add split writer to record OAI list records responses
This commit is contained in:
parent
37ca0ce41f
commit
560bd890f0
15 changed files with 205 additions and 225 deletions
|
@ -1,6 +1,6 @@
|
||||||
group = org.xbib
|
group = org.xbib
|
||||||
name = oai
|
name = oai
|
||||||
version = 2.5.2
|
version = 2.5.3
|
||||||
|
|
||||||
gradle.wrapper.version = 6.6.1
|
gradle.wrapper.version = 6.6.1
|
||||||
xbib-content.version = 2.6.2
|
xbib-content.version = 2.6.2
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
package org.xbib.oai.client;
|
|
||||||
|
|
||||||
import org.xbib.oai.OAIResponse;
|
|
||||||
import org.xbib.oai.exceptions.OAIException;
|
|
||||||
|
|
||||||
import java.io.Writer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default OAI response.
|
|
||||||
*/
|
|
||||||
public abstract class AbstractOAIResponse implements OAIResponse {
|
|
||||||
|
|
||||||
public abstract void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException;
|
|
||||||
}
|
|
|
@ -17,7 +17,6 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.Writer;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.http.HttpClient;
|
import java.net.http.HttpClient;
|
||||||
import java.net.http.HttpRequest;
|
import java.net.http.HttpRequest;
|
||||||
|
@ -46,6 +45,8 @@ public class OAIClient {
|
||||||
|
|
||||||
private String userAgent;
|
private String userAgent;
|
||||||
|
|
||||||
|
private SplitWriter splitWriter;
|
||||||
|
|
||||||
public OAIClient(String baseURL) {
|
public OAIClient(String baseURL) {
|
||||||
this.baseURL = baseURL;
|
this.baseURL = baseURL;
|
||||||
this.httpClient = HttpClient.newBuilder()
|
this.httpClient = HttpClient.newBuilder()
|
||||||
|
@ -57,6 +58,14 @@ public class OAIClient {
|
||||||
this.userAgent = userAgent;
|
this.userAgent = userAgent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setSplitWriter(SplitWriter splitWriter) {
|
||||||
|
this.splitWriter = splitWriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SplitWriter getSplitWriter() {
|
||||||
|
return splitWriter;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This verb is used to retrieve information about a repository.
|
* This verb is used to retrieve information about a repository.
|
||||||
* Some of the information returned is required as part of the OAI-PMH.
|
* Some of the information returned is required as part of the OAI-PMH.
|
||||||
|
@ -140,25 +149,24 @@ public class OAIClient {
|
||||||
* attribute of "deleted" if a record matching the arguments
|
* attribute of "deleted" if a record matching the arguments
|
||||||
* specified in the request has been deleted. No metadata
|
* specified in the request has been deleted. No metadata
|
||||||
* will be present for records with deleted status.
|
* will be present for records with deleted status.
|
||||||
|
*
|
||||||
|
* @param metadataPrefix
|
||||||
|
* @param set
|
||||||
|
* @param dateTimeFormatter
|
||||||
|
* @param from
|
||||||
|
* @param until
|
||||||
|
* @param base
|
||||||
|
* @param handler
|
||||||
|
* @param consumer
|
||||||
*/
|
*/
|
||||||
public void listRecords(String metadataPrefix,
|
|
||||||
String set,
|
|
||||||
DateTimeFormatter dateTimeFormatter,
|
|
||||||
Instant from,
|
|
||||||
Instant until,
|
|
||||||
Writer writer,
|
|
||||||
MetadataHandler handler) {
|
|
||||||
listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, writer, handler);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void listRecords(String metadataPrefix,
|
public void listRecords(String metadataPrefix,
|
||||||
String set,
|
String set,
|
||||||
DateTimeFormatter dateTimeFormatter,
|
DateTimeFormatter dateTimeFormatter,
|
||||||
Instant from,
|
Instant from,
|
||||||
Instant until,
|
Instant until,
|
||||||
Instant base,
|
Instant base,
|
||||||
Writer writer,
|
MetadataHandler handler,
|
||||||
MetadataHandler handler) {
|
Consumer<InputStream> consumer) throws IOException {
|
||||||
do {
|
do {
|
||||||
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
|
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
|
||||||
if (metadataPrefix != null) {
|
if (metadataPrefix != null) {
|
||||||
|
@ -196,108 +204,33 @@ public class OAIClient {
|
||||||
.GET()
|
.GET()
|
||||||
.build();
|
.build();
|
||||||
logger.log(Level.INFO, "sending " + httpRequest);
|
logger.log(Level.INFO, "sending " + httpRequest);
|
||||||
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
|
if (consumer != null) {
|
||||||
int status = httpResponse.statusCode();
|
|
||||||
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
|
|
||||||
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
|
|
||||||
listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, writer);
|
|
||||||
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
|
|
||||||
" resumption-token = " + listRecordsResponse.getResumptionToken());
|
|
||||||
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
|
|
||||||
} catch (NoRecordsMatchException e) {
|
|
||||||
logger.log(Level.WARNING, "no records match");
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.log(Level.SEVERE, e.getMessage(), e);
|
|
||||||
listRecordsRequest = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (base != null && from != null && until != null) {
|
|
||||||
LocalDate fromLocalDate = LocalDate.ofInstant(from, ZoneOffset.UTC);
|
|
||||||
LocalDate untilLocalDate = LocalDate.ofInstant(until, ZoneOffset.UTC);
|
|
||||||
Period period = Period.between(fromLocalDate, untilLocalDate);
|
|
||||||
logger.log(Level.INFO, "from = " + fromLocalDate + " until = " + untilLocalDate + " period = " + period);
|
|
||||||
if (period.getYears() > 0 || period.getMonths() > 0 || period.getDays() > 0) {
|
|
||||||
from = LocalDateTime.ofInstant(from, ZoneOffset.UTC)
|
|
||||||
.plusYears(-period.getYears())
|
|
||||||
.plusMonths(-period.getMonths())
|
|
||||||
.plusDays(-period.getDays())
|
|
||||||
.toInstant(ZoneOffset.UTC);
|
|
||||||
until = LocalDateTime.ofInstant(until, ZoneOffset.UTC)
|
|
||||||
.plusYears(-period.getYears())
|
|
||||||
.plusMonths(-period.getMonths())
|
|
||||||
.plusDays(-period.getDays())
|
|
||||||
.toInstant(ZoneOffset.UTC);
|
|
||||||
} else {
|
|
||||||
throw new IllegalStateException("from = " + from + " until = " + until + ": period is zero");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void listRecords(String metadataPrefix,
|
|
||||||
String set,
|
|
||||||
DateTimeFormatter dateTimeFormatter,
|
|
||||||
Instant from,
|
|
||||||
Instant until,
|
|
||||||
Consumer<InputStream> consumer) {
|
|
||||||
listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, consumer);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void listRecords(String metadataPrefix,
|
|
||||||
String set,
|
|
||||||
DateTimeFormatter dateTimeFormatter,
|
|
||||||
Instant from,
|
|
||||||
Instant until,
|
|
||||||
Instant base,
|
|
||||||
Consumer<InputStream> consumer) {
|
|
||||||
do {
|
|
||||||
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
|
|
||||||
if (metadataPrefix != null) {
|
|
||||||
listRecordsRequest.setMetadataPrefix(metadataPrefix);
|
|
||||||
}
|
|
||||||
if (set != null) {
|
|
||||||
listRecordsRequest.setSet(set);
|
|
||||||
}
|
|
||||||
if (dateTimeFormatter != null) {
|
|
||||||
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
|
|
||||||
}
|
|
||||||
if (from != null) {
|
|
||||||
listRecordsRequest.setFrom(from);
|
|
||||||
}
|
|
||||||
if (until != null) {
|
|
||||||
listRecordsRequest.setUntil(until);
|
|
||||||
}
|
|
||||||
if (from != null && until != null) {
|
|
||||||
if (until.isBefore(from)) {
|
|
||||||
throw new IllegalArgumentException("until must not be before from");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (listRecordsRequest != null) {
|
|
||||||
try {
|
|
||||||
StringWriter sw = new StringWriter();
|
|
||||||
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
|
|
||||||
URL.Builder url = URL.from(baseURL).mutator();
|
|
||||||
listRecordsRequest.getParams().forEach(url::queryParam);
|
|
||||||
HttpRequest httpRequest = HttpRequest.newBuilder()
|
|
||||||
.uri(URI.create(url.build().toExternalForm()))
|
|
||||||
.header("accept", "utf-8")
|
|
||||||
.header("user-agent", userAgent != null ? userAgent : "xbib OAI client")
|
|
||||||
.GET()
|
|
||||||
.build();
|
|
||||||
logger.log(Level.INFO, "sending " + httpRequest);
|
|
||||||
HttpResponse<byte[]> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray());
|
HttpResponse<byte[]> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray());
|
||||||
int status = httpResponse.statusCode();
|
int status = httpResponse.statusCode();
|
||||||
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
|
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
|
||||||
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
|
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
|
||||||
listRecordsResponse.receivedResponse(new String(httpResponse.body(), StandardCharsets.UTF_8), status, contentType, retryAfter, sw);
|
String body = new String(httpResponse.body(), StandardCharsets.UTF_8);
|
||||||
if (consumer != null) {
|
listRecordsResponse.receivedResponse(body, status, contentType, retryAfter, splitWriter);
|
||||||
consumer.accept(new ByteArrayInputStream(httpResponse.body()));
|
|
||||||
}
|
|
||||||
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
|
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
|
||||||
" resumption-token = " + listRecordsResponse.getResumptionToken());
|
" resumption-token = " + listRecordsResponse.getResumptionToken());
|
||||||
|
byte[] b = httpResponse.body();
|
||||||
|
if (b.length > 0) {
|
||||||
|
consumer.accept(new ByteArrayInputStream(b));
|
||||||
|
logger.log(Level.FINE, "body consumed: " + body);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
|
||||||
|
int status = httpResponse.statusCode();
|
||||||
|
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
|
||||||
|
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
|
||||||
|
listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, splitWriter);
|
||||||
|
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
|
||||||
|
" resumption-token = " + listRecordsResponse.getResumptionToken());
|
||||||
|
}
|
||||||
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
|
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
|
||||||
} catch (NoRecordsMatchException e) {
|
} catch (NoRecordsMatchException e) {
|
||||||
logger.log(Level.WARNING, "no records match");
|
logger.log(Level.WARNING, "no records match");
|
||||||
|
listRecordsRequest = null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.log(Level.SEVERE, e.getMessage(), e);
|
logger.log(Level.SEVERE, e.getMessage(), e);
|
||||||
listRecordsRequest = null;
|
listRecordsRequest = null;
|
||||||
|
@ -324,6 +257,9 @@ public class OAIClient {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base));
|
} while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base));
|
||||||
|
if (splitWriter != null) {
|
||||||
|
splitWriter.close();;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public IdentifyRequest resume(IdentifyRequest request, ResumptionToken<?> token) {
|
public IdentifyRequest resume(IdentifyRequest request, ResumptionToken<?> token) {
|
||||||
|
@ -403,5 +339,4 @@ public class OAIClient {
|
||||||
nextRequest.setResumptionToken(token);
|
nextRequest.setResumptionToken(token);
|
||||||
return nextRequest;
|
return nextRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
104
oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java
Normal file
104
oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
package org.xbib.oai.client;
|
||||||
|
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.Flushable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
import java.util.zip.Deflater;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
|
public class SplitWriter extends Writer implements Flushable, Closeable {
|
||||||
|
|
||||||
|
private final String fileNamePattern;
|
||||||
|
|
||||||
|
private final int splitSize;
|
||||||
|
|
||||||
|
private final int bufferSize;
|
||||||
|
|
||||||
|
private final boolean compress;
|
||||||
|
|
||||||
|
private final AtomicInteger fileNameCounter;
|
||||||
|
|
||||||
|
private final AtomicLong splitCounter;
|
||||||
|
|
||||||
|
private final ReentrantLock lock;
|
||||||
|
|
||||||
|
private Writer writer;
|
||||||
|
|
||||||
|
public SplitWriter(String fileNamePattern,
|
||||||
|
int splitSize,
|
||||||
|
int bufferSize,
|
||||||
|
boolean compress) {
|
||||||
|
this.fileNameCounter = new AtomicInteger();
|
||||||
|
this.splitCounter = new AtomicLong();
|
||||||
|
this.fileNamePattern = fileNamePattern;
|
||||||
|
this.splitSize = splitSize;
|
||||||
|
this.bufferSize = bufferSize;
|
||||||
|
this.compress = compress;
|
||||||
|
this.lock = new ReentrantLock();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(char[] cbuf, int off, int len) throws IOException {
|
||||||
|
if (cbuf == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
if (writer == null) {
|
||||||
|
split();
|
||||||
|
}
|
||||||
|
writer.write(cbuf, off, len);
|
||||||
|
if (splitSize > 0 && splitCounter.addAndGet(len) > splitSize) {
|
||||||
|
splitCounter.set(0L);
|
||||||
|
split();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (writer != null) {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flush() throws IOException {
|
||||||
|
if (writer != null) {
|
||||||
|
writer.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void split() throws IOException {
|
||||||
|
if (writer != null) {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
|
||||||
|
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
|
||||||
|
StandardOpenOption.TRUNCATE_EXISTING);
|
||||||
|
this.writer = new OutputStreamWriter(compress ?
|
||||||
|
new CompressedOutputStream(out, bufferSize) :
|
||||||
|
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class CompressedOutputStream extends GZIPOutputStream {
|
||||||
|
|
||||||
|
CompressedOutputStream(OutputStream out, int size) throws IOException {
|
||||||
|
super(out, size, true);
|
||||||
|
def.setLevel(Deflater.BEST_COMPRESSION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,16 +1,10 @@
|
||||||
package org.xbib.oai.client.getrecord;
|
package org.xbib.oai.client.getrecord;
|
||||||
|
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
|
|
||||||
import java.io.Writer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class GetRecordResponse extends AbstractOAIResponse {
|
public class GetRecordResponse implements OAIResponse {
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) {
|
|
||||||
// not implemented yet
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@ package org.xbib.oai.client.identify;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
import org.xbib.oai.exceptions.OAIException;
|
import org.xbib.oai.exceptions.OAIException;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
@ -22,7 +22,7 @@ import javax.xml.parsers.ParserConfigurationException;
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class IdentifyResponse extends AbstractOAIResponse {
|
public class IdentifyResponse implements OAIResponse {
|
||||||
|
|
||||||
private String repositoryName;
|
private String repositoryName;
|
||||||
|
|
||||||
|
@ -40,7 +40,6 @@ public class IdentifyResponse extends AbstractOAIResponse {
|
||||||
|
|
||||||
private String compression;
|
private String compression;
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) {
|
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) {
|
||||||
try {
|
try {
|
||||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||||
|
|
|
@ -1,17 +1,7 @@
|
||||||
package org.xbib.oai.client.listidentifiers;
|
package org.xbib.oai.client.listidentifiers;
|
||||||
|
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
import org.xbib.oai.exceptions.OAIException;
|
|
||||||
|
|
||||||
import java.io.Writer;
|
public class ListIdentifiersResponse implements OAIResponse {
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class ListIdentifiersResponse extends AbstractOAIResponse {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int statusCode, String contentTyep, String retryAfter, Writer writer) throws OAIException {
|
|
||||||
// not implemented yet
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,17 +1,7 @@
|
||||||
package org.xbib.oai.client.listmetadataformats;
|
package org.xbib.oai.client.listmetadataformats;
|
||||||
|
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
import org.xbib.oai.exceptions.OAIException;
|
|
||||||
|
|
||||||
import java.io.Writer;
|
public class ListMetadataFormatsResponse implements OAIResponse {
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class ListMetadataFormatsResponse extends AbstractOAIResponse {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
|
|
||||||
// not implemented yet
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,6 @@ import org.xbib.oai.util.ResumptionToken;
|
||||||
import org.xbib.oai.xml.MetadataHandler;
|
import org.xbib.oai.xml.MetadataHandler;
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
|
@ -16,9 +15,6 @@ import java.time.ZonedDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.time.format.DateTimeParseException;
|
import java.time.format.DateTimeParseException;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class ListRecordsFilterReader extends XMLFilterReader {
|
public class ListRecordsFilterReader extends XMLFilterReader {
|
||||||
|
|
||||||
private final ListRecordsRequest request;
|
private final ListRecordsRequest request;
|
||||||
|
@ -125,7 +121,7 @@ public class ListRecordsFilterReader extends XMLFilterReader {
|
||||||
inMetadata = false;
|
inMetadata = false;
|
||||||
break;
|
break;
|
||||||
case "responseDate":
|
case "responseDate":
|
||||||
response.setDate(Instant.parse(content.toString().trim()));
|
//response.setDate(Instant.parse(content.toString().trim()));
|
||||||
break;
|
break;
|
||||||
case "resumptionToken":
|
case "resumptionToken":
|
||||||
if (token != null && content != null && content.length() > 0) {
|
if (token != null && content != null && content.length() > 0) {
|
||||||
|
|
|
@ -2,7 +2,8 @@ package org.xbib.oai.client.listrecords;
|
||||||
|
|
||||||
import org.xbib.content.xml.transform.TransformerURIResolver;
|
import org.xbib.content.xml.transform.TransformerURIResolver;
|
||||||
import org.xbib.content.xml.util.XMLUtil;
|
import org.xbib.content.xml.util.XMLUtil;
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
|
import org.xbib.oai.client.SplitWriter;
|
||||||
import org.xbib.oai.exceptions.BadVerbException;
|
import org.xbib.oai.exceptions.BadVerbException;
|
||||||
import org.xbib.oai.exceptions.BadArgumentException;
|
import org.xbib.oai.exceptions.BadArgumentException;
|
||||||
import org.xbib.oai.exceptions.BadResumptionTokenException;
|
import org.xbib.oai.exceptions.BadResumptionTokenException;
|
||||||
|
@ -11,12 +12,13 @@ import org.xbib.oai.exceptions.OAIException;
|
||||||
import org.xbib.oai.util.ResumptionToken;
|
import org.xbib.oai.util.ResumptionToken;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.Writer;
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.time.temporal.ChronoUnit;
|
import java.time.temporal.ChronoUnit;
|
||||||
|
|
||||||
import javax.xml.transform.Source;
|
import javax.xml.transform.Source;
|
||||||
import javax.xml.transform.Transformer;
|
import javax.xml.transform.Transformer;
|
||||||
import javax.xml.transform.TransformerException;
|
import javax.xml.transform.TransformerException;
|
||||||
|
@ -24,10 +26,7 @@ import javax.xml.transform.TransformerFactory;
|
||||||
import javax.xml.transform.sax.SAXSource;
|
import javax.xml.transform.sax.SAXSource;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
/**
|
public class ListRecordsResponse implements OAIResponse {
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class ListRecordsResponse extends AbstractOAIResponse {
|
|
||||||
|
|
||||||
private final ListRecordsRequest request;
|
private final ListRecordsRequest request;
|
||||||
|
|
||||||
|
@ -37,8 +36,6 @@ public class ListRecordsResponse extends AbstractOAIResponse {
|
||||||
|
|
||||||
private String error;
|
private String error;
|
||||||
|
|
||||||
private Instant date;
|
|
||||||
|
|
||||||
public ListRecordsResponse(ListRecordsRequest request) {
|
public ListRecordsResponse(ListRecordsRequest request) {
|
||||||
this.request = request;
|
this.request = request;
|
||||||
this.retryAfterMillis = 20L * 1000L; // 20 seconds by default
|
this.retryAfterMillis = 20L * 1000L; // 20 seconds by default
|
||||||
|
@ -57,16 +54,7 @@ public class ListRecordsResponse extends AbstractOAIResponse {
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDate(Instant date) {
|
public void receivedResponse(String message, int status, String contentType, String retryAfter, SplitWriter splitWriter) throws OAIException {
|
||||||
this.date = date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Instant getDate() {
|
|
||||||
return date;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException {
|
|
||||||
if (status == 503) {
|
if (status == 503) {
|
||||||
long secs = retryAfterMillis / 1000;
|
long secs = retryAfterMillis / 1000;
|
||||||
if (retryAfter != null) {
|
if (retryAfter != null) {
|
||||||
|
@ -105,11 +93,18 @@ public class ListRecordsResponse extends AbstractOAIResponse {
|
||||||
this.filterreader = new ListRecordsFilterReader(request, this);
|
this.filterreader = new ListRecordsFilterReader(request, this);
|
||||||
if (message != null) {
|
if (message != null) {
|
||||||
try {
|
try {
|
||||||
|
// OAI does not know of doc streams.
|
||||||
|
// Each XML transformation runs on a single XML per call. To record the XML of subsequent list record calls,
|
||||||
|
// we use a file writer that can roll over XML docs and count the files by filename.
|
||||||
|
// So we effectively record the calls to the OAI server, not the result docs in the calls.
|
||||||
|
if (splitWriter != null) {
|
||||||
|
splitWriter.split();
|
||||||
|
}
|
||||||
TransformerFactory transformerFactory = TransformerFactory.newInstance();
|
TransformerFactory transformerFactory = TransformerFactory.newInstance();
|
||||||
transformerFactory.setURIResolver(new TransformerURIResolver("xsl"));
|
transformerFactory.setURIResolver(new TransformerURIResolver("xsl"));
|
||||||
Transformer transformer = transformerFactory.newTransformer();
|
Transformer transformer = transformerFactory.newTransformer();
|
||||||
Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message))));
|
Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message))));
|
||||||
StreamResult streamResult = writer != null ? new StreamResult(writer) : new StreamResult(new StringWriter());
|
StreamResult streamResult = new StreamResult(splitWriter != null ? splitWriter : new StringWriter());
|
||||||
transformer.transform(source, streamResult);
|
transformer.transform(source, streamResult);
|
||||||
if ("noRecordsMatch".equals(error)) {
|
if ("noRecordsMatch".equals(error)) {
|
||||||
throw new NoRecordsMatchException("metadataPrefix=" + request.getMetadataPrefix()
|
throw new NoRecordsMatchException("metadataPrefix=" + request.getMetadataPrefix()
|
||||||
|
@ -125,7 +120,7 @@ public class ListRecordsResponse extends AbstractOAIResponse {
|
||||||
} else if (error != null) {
|
} else if (error != null) {
|
||||||
throw new OAIException(error);
|
throw new OAIException(error);
|
||||||
}
|
}
|
||||||
} catch (TransformerException t) {
|
} catch (TransformerException | IOException t) {
|
||||||
throw new OAIException(t);
|
throw new OAIException(t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,17 +1,6 @@
|
||||||
package org.xbib.oai.client.listsets;
|
package org.xbib.oai.client.listsets;
|
||||||
|
|
||||||
import org.xbib.oai.client.AbstractOAIResponse;
|
import org.xbib.oai.OAIResponse;
|
||||||
import org.xbib.oai.exceptions.OAIException;
|
|
||||||
|
|
||||||
import java.io.Writer;
|
public class ListSetsResponse implements OAIResponse {
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class ListSetsResponse extends AbstractOAIResponse {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
|
|
||||||
// not implemented yet
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,8 +6,6 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.xbib.oai.client.identify.IdentifyResponse;
|
import org.xbib.oai.client.identify.IdentifyResponse;
|
||||||
import org.xbib.oai.xml.SimpleMetadataHandler;
|
import org.xbib.oai.xml.SimpleMetadataHandler;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -21,7 +19,9 @@ class ArxivClientTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testListRecordsArxivWithJdkClient() throws Exception {
|
void testListRecordsArxivWithJdkClient() throws Exception {
|
||||||
|
SplitWriter splitWriter = new SplitWriter("build/arxiv-%d.oai", -1, 8192, false);
|
||||||
OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/");
|
OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/");
|
||||||
|
oaiClient.setSplitWriter(splitWriter);
|
||||||
IdentifyResponse identifyResponse = oaiClient.identify();
|
IdentifyResponse identifyResponse = oaiClient.identify();
|
||||||
String granularity = identifyResponse.getGranularity();
|
String granularity = identifyResponse.getGranularity();
|
||||||
logger.log(Level.INFO, "granularity = " + granularity);
|
logger.log(Level.INFO, "granularity = " + granularity);
|
||||||
|
@ -31,12 +31,9 @@ class ArxivClientTest {
|
||||||
logger.log(Level.INFO,"waiting 20 seconds");
|
logger.log(Level.INFO,"waiting 20 seconds");
|
||||||
Thread.sleep(20 * 1000L);
|
Thread.sleep(20 * 1000L);
|
||||||
Handler handler = new Handler();
|
Handler handler = new Handler();
|
||||||
File file = File.createTempFile("arxiv.", ".xml");
|
|
||||||
file.deleteOnExit();
|
|
||||||
FileWriter fileWriter = new FileWriter(file);
|
|
||||||
oaiClient.listRecords("arXiv", null,
|
oaiClient.listRecords("arXiv", null,
|
||||||
dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), fileWriter, handler);
|
dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), null,
|
||||||
fileWriter.close();
|
handler, null);
|
||||||
logger.log(Level.INFO, "count = " + handler.count());
|
logger.log(Level.INFO, "count = " + handler.count());
|
||||||
assertTrue(handler.count() > 0L);
|
assertTrue(handler.count() > 0L);
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ class BundeskunsthalleTest {
|
||||||
writer.startDocument();
|
writer.startDocument();
|
||||||
writer.beginCollection();
|
writer.beginCollection();
|
||||||
oaiClient.listRecords("marcxml", null,
|
oaiClient.listRecords("marcxml", null,
|
||||||
dateTimeFormatter, null, null, inputStream -> {
|
dateTimeFormatter, null, null, null, null, inputStream -> {
|
||||||
try {
|
try {
|
||||||
Marc.builder()
|
Marc.builder()
|
||||||
.setInputStream(inputStream)
|
.setInputStream(inputStream)
|
||||||
|
|
|
@ -5,8 +5,6 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.xbib.oai.client.identify.IdentifyResponse;
|
import org.xbib.oai.client.identify.IdentifyResponse;
|
||||||
import org.xbib.oai.xml.SimpleMetadataHandler;
|
import org.xbib.oai.xml.SimpleMetadataHandler;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -19,7 +17,20 @@ class DNBClientTest {
|
||||||
private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName());
|
private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName());
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testBibdat() throws Exception {
|
void testBibdatFileDump() throws Exception {
|
||||||
|
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
|
||||||
|
Instant from = Instant.parse("2021-02-01T00:00:00Z");
|
||||||
|
Instant until = Instant.parse("2021-03-01T00:00:00Z");
|
||||||
|
Instant base = Instant.parse("2010-01-01T00:00:00Z");
|
||||||
|
SplitWriter splitWriter = new SplitWriter("build/dnb-bib-pica-%d.xml", -1, 8192, false);
|
||||||
|
oaiClient.setSplitWriter(splitWriter);
|
||||||
|
oaiClient.listRecords("PicaPlus-xml", "bib",
|
||||||
|
null, from, until, base,
|
||||||
|
null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testBibdatSimpleMetadataHandler() throws Exception {
|
||||||
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
|
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
|
||||||
IdentifyResponse identifyResponse = oaiClient.identify();
|
IdentifyResponse identifyResponse = oaiClient.identify();
|
||||||
String granularity = identifyResponse.getGranularity();
|
String granularity = identifyResponse.getGranularity();
|
||||||
|
@ -27,12 +38,9 @@ class DNBClientTest {
|
||||||
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
|
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
|
||||||
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null;
|
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null;
|
||||||
Handler handler = new Handler();
|
Handler handler = new Handler();
|
||||||
File file = new File("build/dnb-bib-pica.xml");
|
|
||||||
try (FileWriter fileWriter = new FileWriter(file)) {
|
|
||||||
oaiClient.listRecords("PicaPlus-xml", "bib",
|
oaiClient.listRecords("PicaPlus-xml", "bib",
|
||||||
dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"),
|
dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"), null,
|
||||||
fileWriter, handler);
|
handler, null);
|
||||||
}
|
|
||||||
logger.log(Level.INFO, "count=" + handler.count());
|
logger.log(Level.INFO, "count=" + handler.count());
|
||||||
assertTrue(handler.count() > 0);
|
assertTrue(handler.count() > 0);
|
||||||
}
|
}
|
||||||
|
@ -43,12 +51,12 @@ class DNBClientTest {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startDocument() {
|
public void startDocument() {
|
||||||
logger.log(Level.FINE, "start doc");
|
logger.log(Level.INFO, "start doc");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void endDocument() {
|
public void endDocument() {
|
||||||
logger.log(Level.FINE, "end doc");
|
logger.log(Level.INFO, "end doc");
|
||||||
count.incrementAndGet();
|
count.incrementAndGet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,6 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.xbib.oai.client.identify.IdentifyResponse;
|
import org.xbib.oai.client.identify.IdentifyResponse;
|
||||||
import org.xbib.oai.xml.SimpleMetadataHandler;
|
import org.xbib.oai.xml.SimpleMetadataHandler;
|
||||||
|
|
||||||
import java.io.Writer;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -29,11 +26,11 @@ class DOAJClientTest {
|
||||||
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
|
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
|
||||||
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null;
|
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null;
|
||||||
Handler handler = new Handler();
|
Handler handler = new Handler();
|
||||||
try (Writer writer = Files.newBufferedWriter(Paths.get("build/doaj.xml"))) {
|
SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false);
|
||||||
|
oaiClient.setSplitWriter(splitWriter);
|
||||||
oaiClient.listRecords("oai_dc", null,
|
oaiClient.listRecords("oai_dc", null,
|
||||||
dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"),
|
dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"), null,
|
||||||
writer, handler);
|
handler, null);
|
||||||
}
|
|
||||||
logger.log(Level.INFO, "count = " + handler.count());
|
logger.log(Level.INFO, "count = " + handler.count());
|
||||||
assertTrue(handler.count() > 0);
|
assertTrue(handler.count() > 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue