add split writer to record OAI list records responses

This commit is contained in:
Jörg Prante 2021-02-04 16:17:52 +01:00
parent 37ca0ce41f
commit 560bd890f0
15 changed files with 205 additions and 225 deletions

View file

@ -1,6 +1,6 @@
group = org.xbib group = org.xbib
name = oai name = oai
version = 2.5.2 version = 2.5.3
gradle.wrapper.version = 6.6.1 gradle.wrapper.version = 6.6.1
xbib-content.version = 2.6.2 xbib-content.version = 2.6.2

View file

@ -1,14 +0,0 @@
package org.xbib.oai.client;
import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException;
import java.io.Writer;
/**
* Default OAI response.
*/
public abstract class AbstractOAIResponse implements OAIResponse {
public abstract void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException;
}

View file

@ -17,7 +17,6 @@ import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.Writer;
import java.net.URI; import java.net.URI;
import java.net.http.HttpClient; import java.net.http.HttpClient;
import java.net.http.HttpRequest; import java.net.http.HttpRequest;
@ -46,6 +45,8 @@ public class OAIClient {
private String userAgent; private String userAgent;
private SplitWriter splitWriter;
public OAIClient(String baseURL) { public OAIClient(String baseURL) {
this.baseURL = baseURL; this.baseURL = baseURL;
this.httpClient = HttpClient.newBuilder() this.httpClient = HttpClient.newBuilder()
@ -57,6 +58,14 @@ public class OAIClient {
this.userAgent = userAgent; this.userAgent = userAgent;
} }
public void setSplitWriter(SplitWriter splitWriter) {
this.splitWriter = splitWriter;
}
public SplitWriter getSplitWriter() {
return splitWriter;
}
/** /**
* This verb is used to retrieve information about a repository. * This verb is used to retrieve information about a repository.
* Some of the information returned is required as part of the OAI-PMH. * Some of the information returned is required as part of the OAI-PMH.
@ -140,25 +149,24 @@ public class OAIClient {
* attribute of "deleted" if a record matching the arguments * attribute of "deleted" if a record matching the arguments
* specified in the request has been deleted. No metadata * specified in the request has been deleted. No metadata
* will be present for records with deleted status. * will be present for records with deleted status.
*
* @param metadataPrefix
* @param set
* @param dateTimeFormatter
* @param from
* @param until
* @param base
* @param handler
* @param consumer
*/ */
public void listRecords(String metadataPrefix,
String set,
DateTimeFormatter dateTimeFormatter,
Instant from,
Instant until,
Writer writer,
MetadataHandler handler) {
listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, writer, handler);
}
public void listRecords(String metadataPrefix, public void listRecords(String metadataPrefix,
String set, String set,
DateTimeFormatter dateTimeFormatter, DateTimeFormatter dateTimeFormatter,
Instant from, Instant from,
Instant until, Instant until,
Instant base, Instant base,
Writer writer, MetadataHandler handler,
MetadataHandler handler) { Consumer<InputStream> consumer) throws IOException {
do { do {
ListRecordsRequest listRecordsRequest = new ListRecordsRequest(); ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
if (metadataPrefix != null) { if (metadataPrefix != null) {
@ -196,108 +204,33 @@ public class OAIClient {
.GET() .GET()
.build(); .build();
logger.log(Level.INFO, "sending " + httpRequest); logger.log(Level.INFO, "sending " + httpRequest);
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString()); if (consumer != null) {
int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, writer);
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
" resumption-token = " + listRecordsResponse.getResumptionToken());
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (NoRecordsMatchException e) {
logger.log(Level.WARNING, "no records match");
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
}
}
if (base != null && from != null && until != null) {
LocalDate fromLocalDate = LocalDate.ofInstant(from, ZoneOffset.UTC);
LocalDate untilLocalDate = LocalDate.ofInstant(until, ZoneOffset.UTC);
Period period = Period.between(fromLocalDate, untilLocalDate);
logger.log(Level.INFO, "from = " + fromLocalDate + " until = " + untilLocalDate + " period = " + period);
if (period.getYears() > 0 || period.getMonths() > 0 || period.getDays() > 0) {
from = LocalDateTime.ofInstant(from, ZoneOffset.UTC)
.plusYears(-period.getYears())
.plusMonths(-period.getMonths())
.plusDays(-period.getDays())
.toInstant(ZoneOffset.UTC);
until = LocalDateTime.ofInstant(until, ZoneOffset.UTC)
.plusYears(-period.getYears())
.plusMonths(-period.getMonths())
.plusDays(-period.getDays())
.toInstant(ZoneOffset.UTC);
} else {
throw new IllegalStateException("from = " + from + " until = " + until + ": period is zero");
}
}
} while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base));
}
public void listRecords(String metadataPrefix,
String set,
DateTimeFormatter dateTimeFormatter,
Instant from,
Instant until,
Consumer<InputStream> consumer) {
listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, consumer);
}
public void listRecords(String metadataPrefix,
String set,
DateTimeFormatter dateTimeFormatter,
Instant from,
Instant until,
Instant base,
Consumer<InputStream> consumer) {
do {
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
if (metadataPrefix != null) {
listRecordsRequest.setMetadataPrefix(metadataPrefix);
}
if (set != null) {
listRecordsRequest.setSet(set);
}
if (dateTimeFormatter != null) {
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
}
if (from != null) {
listRecordsRequest.setFrom(from);
}
if (until != null) {
listRecordsRequest.setUntil(until);
}
if (from != null && until != null) {
if (until.isBefore(from)) {
throw new IllegalArgumentException("until must not be before from");
}
}
while (listRecordsRequest != null) {
try {
StringWriter sw = new StringWriter();
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
URL.Builder url = URL.from(baseURL).mutator();
listRecordsRequest.getParams().forEach(url::queryParam);
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(url.build().toExternalForm()))
.header("accept", "utf-8")
.header("user-agent", userAgent != null ? userAgent : "xbib OAI client")
.GET()
.build();
logger.log(Level.INFO, "sending " + httpRequest);
HttpResponse<byte[]> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray()); HttpResponse<byte[]> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray());
int status = httpResponse.statusCode(); int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null); String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null); String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
listRecordsResponse.receivedResponse(new String(httpResponse.body(), StandardCharsets.UTF_8), status, contentType, retryAfter, sw); String body = new String(httpResponse.body(), StandardCharsets.UTF_8);
if (consumer != null) { listRecordsResponse.receivedResponse(body, status, contentType, retryAfter, splitWriter);
consumer.accept(new ByteArrayInputStream(httpResponse.body()));
}
logger.log(Level.FINE, "response headers = " + httpResponse.headers() + logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
" resumption-token = " + listRecordsResponse.getResumptionToken()); " resumption-token = " + listRecordsResponse.getResumptionToken());
byte[] b = httpResponse.body();
if (b.length > 0) {
consumer.accept(new ByteArrayInputStream(b));
logger.log(Level.FINE, "body consumed: " + body);
}
} else {
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, splitWriter);
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
" resumption-token = " + listRecordsResponse.getResumptionToken());
}
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken()); listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (NoRecordsMatchException e) { } catch (NoRecordsMatchException e) {
logger.log(Level.WARNING, "no records match"); logger.log(Level.WARNING, "no records match");
listRecordsRequest = null;
} catch (Exception e) { } catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e); logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null; listRecordsRequest = null;
@ -324,6 +257,9 @@ public class OAIClient {
} }
} }
} while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base)); } while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base));
if (splitWriter != null) {
splitWriter.close();;
}
} }
public IdentifyRequest resume(IdentifyRequest request, ResumptionToken<?> token) { public IdentifyRequest resume(IdentifyRequest request, ResumptionToken<?> token) {
@ -403,5 +339,4 @@ public class OAIClient {
nextRequest.setResumptionToken(token); nextRequest.setResumptionToken(token);
return nextRequest; return nextRequest;
} }
} }

View file

@ -0,0 +1,104 @@
package org.xbib.oai.client;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import java.util.zip.Deflater;
import java.util.zip.GZIPOutputStream;
public class SplitWriter extends Writer implements Flushable, Closeable {
private final String fileNamePattern;
private final int splitSize;
private final int bufferSize;
private final boolean compress;
private final AtomicInteger fileNameCounter;
private final AtomicLong splitCounter;
private final ReentrantLock lock;
private Writer writer;
public SplitWriter(String fileNamePattern,
int splitSize,
int bufferSize,
boolean compress) {
this.fileNameCounter = new AtomicInteger();
this.splitCounter = new AtomicLong();
this.fileNamePattern = fileNamePattern;
this.splitSize = splitSize;
this.bufferSize = bufferSize;
this.compress = compress;
this.lock = new ReentrantLock();
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
if (cbuf == null) {
return;
}
lock.lock();
try {
if (writer == null) {
split();
}
writer.write(cbuf, off, len);
if (splitSize > 0 && splitCounter.addAndGet(len) > splitSize) {
splitCounter.set(0L);
split();
}
} finally {
lock.unlock();
}
}
@Override
public void close() throws IOException {
if (writer != null) {
writer.close();
}
}
@Override
public void flush() throws IOException {
if (writer != null) {
writer.flush();
}
}
public void split() throws IOException {
if (writer != null) {
writer.close();
}
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING);
this.writer = new OutputStreamWriter(compress ?
new CompressedOutputStream(out, bufferSize) :
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
}
private static class CompressedOutputStream extends GZIPOutputStream {
CompressedOutputStream(OutputStream out, int size) throws IOException {
super(out, size, true);
def.setLevel(Deflater.BEST_COMPRESSION);
}
}
}

View file

@ -1,16 +1,10 @@
package org.xbib.oai.client.getrecord; package org.xbib.oai.client.getrecord;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import java.io.Writer;
/** /**
* *
*/ */
public class GetRecordResponse extends AbstractOAIResponse { public class GetRecordResponse implements OAIResponse {
@Override
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) {
// not implemented yet
}
} }

View file

@ -3,7 +3,7 @@ package org.xbib.oai.client.identify;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException; import org.xbib.oai.exceptions.OAIException;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
@ -22,7 +22,7 @@ import javax.xml.parsers.ParserConfigurationException;
/** /**
* *
*/ */
public class IdentifyResponse extends AbstractOAIResponse { public class IdentifyResponse implements OAIResponse {
private String repositoryName; private String repositoryName;
@ -40,7 +40,6 @@ public class IdentifyResponse extends AbstractOAIResponse {
private String compression; private String compression;
@Override
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) { public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) {
try { try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

View file

@ -1,17 +1,7 @@
package org.xbib.oai.client.listidentifiers; package org.xbib.oai.client.listidentifiers;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException;
import java.io.Writer; public class ListIdentifiersResponse implements OAIResponse {
/**
*
*/
public class ListIdentifiersResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(String message, int statusCode, String contentTyep, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
} }

View file

@ -1,17 +1,7 @@
package org.xbib.oai.client.listmetadataformats; package org.xbib.oai.client.listmetadataformats;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException;
import java.io.Writer; public class ListMetadataFormatsResponse implements OAIResponse {
/**
*
*/
public class ListMetadataFormatsResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
} }

View file

@ -7,7 +7,6 @@ import org.xbib.oai.util.ResumptionToken;
import org.xbib.oai.xml.MetadataHandler; import org.xbib.oai.xml.MetadataHandler;
import org.xml.sax.Attributes; import org.xml.sax.Attributes;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.xml.sax.ContentHandler;
import java.time.Instant; import java.time.Instant;
import java.time.LocalDate; import java.time.LocalDate;
@ -16,9 +15,6 @@ import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException; import java.time.format.DateTimeParseException;
/**
*
*/
public class ListRecordsFilterReader extends XMLFilterReader { public class ListRecordsFilterReader extends XMLFilterReader {
private final ListRecordsRequest request; private final ListRecordsRequest request;
@ -125,7 +121,7 @@ public class ListRecordsFilterReader extends XMLFilterReader {
inMetadata = false; inMetadata = false;
break; break;
case "responseDate": case "responseDate":
response.setDate(Instant.parse(content.toString().trim())); //response.setDate(Instant.parse(content.toString().trim()));
break; break;
case "resumptionToken": case "resumptionToken":
if (token != null && content != null && content.length() > 0) { if (token != null && content != null && content.length() > 0) {

View file

@ -2,7 +2,8 @@ package org.xbib.oai.client.listrecords;
import org.xbib.content.xml.transform.TransformerURIResolver; import org.xbib.content.xml.transform.TransformerURIResolver;
import org.xbib.content.xml.util.XMLUtil; import org.xbib.content.xml.util.XMLUtil;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import org.xbib.oai.client.SplitWriter;
import org.xbib.oai.exceptions.BadVerbException; import org.xbib.oai.exceptions.BadVerbException;
import org.xbib.oai.exceptions.BadArgumentException; import org.xbib.oai.exceptions.BadArgumentException;
import org.xbib.oai.exceptions.BadResumptionTokenException; import org.xbib.oai.exceptions.BadResumptionTokenException;
@ -11,12 +12,13 @@ import org.xbib.oai.exceptions.OAIException;
import org.xbib.oai.util.ResumptionToken; import org.xbib.oai.util.ResumptionToken;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.Writer;
import java.time.Instant; import java.time.Instant;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit; import java.time.temporal.ChronoUnit;
import javax.xml.transform.Source; import javax.xml.transform.Source;
import javax.xml.transform.Transformer; import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerException;
@ -24,10 +26,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource; import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
/** public class ListRecordsResponse implements OAIResponse {
*
*/
public class ListRecordsResponse extends AbstractOAIResponse {
private final ListRecordsRequest request; private final ListRecordsRequest request;
@ -37,8 +36,6 @@ public class ListRecordsResponse extends AbstractOAIResponse {
private String error; private String error;
private Instant date;
public ListRecordsResponse(ListRecordsRequest request) { public ListRecordsResponse(ListRecordsRequest request) {
this.request = request; this.request = request;
this.retryAfterMillis = 20L * 1000L; // 20 seconds by default this.retryAfterMillis = 20L * 1000L; // 20 seconds by default
@ -57,16 +54,7 @@ public class ListRecordsResponse extends AbstractOAIResponse {
return error; return error;
} }
public void setDate(Instant date) { public void receivedResponse(String message, int status, String contentType, String retryAfter, SplitWriter splitWriter) throws OAIException {
this.date = date;
}
public Instant getDate() {
return date;
}
@Override
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException {
if (status == 503) { if (status == 503) {
long secs = retryAfterMillis / 1000; long secs = retryAfterMillis / 1000;
if (retryAfter != null) { if (retryAfter != null) {
@ -105,11 +93,18 @@ public class ListRecordsResponse extends AbstractOAIResponse {
this.filterreader = new ListRecordsFilterReader(request, this); this.filterreader = new ListRecordsFilterReader(request, this);
if (message != null) { if (message != null) {
try { try {
// OAI does not know of doc streams.
// Each XML transformation runs on a single XML per call. To record the XML of subsequent list record calls,
// we use a file writer that can roll over XML docs and count the files by filename.
// So we effectively record the calls to the OAI server, not the result docs in the calls.
if (splitWriter != null) {
splitWriter.split();
}
TransformerFactory transformerFactory = TransformerFactory.newInstance(); TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setURIResolver(new TransformerURIResolver("xsl")); transformerFactory.setURIResolver(new TransformerURIResolver("xsl"));
Transformer transformer = transformerFactory.newTransformer(); Transformer transformer = transformerFactory.newTransformer();
Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message)))); Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message))));
StreamResult streamResult = writer != null ? new StreamResult(writer) : new StreamResult(new StringWriter()); StreamResult streamResult = new StreamResult(splitWriter != null ? splitWriter : new StringWriter());
transformer.transform(source, streamResult); transformer.transform(source, streamResult);
if ("noRecordsMatch".equals(error)) { if ("noRecordsMatch".equals(error)) {
throw new NoRecordsMatchException("metadataPrefix=" + request.getMetadataPrefix() throw new NoRecordsMatchException("metadataPrefix=" + request.getMetadataPrefix()
@ -125,7 +120,7 @@ public class ListRecordsResponse extends AbstractOAIResponse {
} else if (error != null) { } else if (error != null) {
throw new OAIException(error); throw new OAIException(error);
} }
} catch (TransformerException t) { } catch (TransformerException | IOException t) {
throw new OAIException(t); throw new OAIException(t);
} }
} }

View file

@ -1,17 +1,6 @@
package org.xbib.oai.client.listsets; package org.xbib.oai.client.listsets;
import org.xbib.oai.client.AbstractOAIResponse; import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException;
import java.io.Writer; public class ListSetsResponse implements OAIResponse {
/**
*
*/
public class ListSetsResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
} }

View file

@ -6,8 +6,6 @@ import org.junit.jupiter.api.Test;
import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.xml.SimpleMetadataHandler; import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.File;
import java.io.FileWriter;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -21,7 +19,9 @@ class ArxivClientTest {
@Test @Test
void testListRecordsArxivWithJdkClient() throws Exception { void testListRecordsArxivWithJdkClient() throws Exception {
SplitWriter splitWriter = new SplitWriter("build/arxiv-%d.oai", -1, 8192, false);
OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/"); OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/");
oaiClient.setSplitWriter(splitWriter);
IdentifyResponse identifyResponse = oaiClient.identify(); IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity(); String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity); logger.log(Level.INFO, "granularity = " + granularity);
@ -31,12 +31,9 @@ class ArxivClientTest {
logger.log(Level.INFO,"waiting 20 seconds"); logger.log(Level.INFO,"waiting 20 seconds");
Thread.sleep(20 * 1000L); Thread.sleep(20 * 1000L);
Handler handler = new Handler(); Handler handler = new Handler();
File file = File.createTempFile("arxiv.", ".xml");
file.deleteOnExit();
FileWriter fileWriter = new FileWriter(file);
oaiClient.listRecords("arXiv", null, oaiClient.listRecords("arXiv", null,
dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), fileWriter, handler); dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), null,
fileWriter.close(); handler, null);
logger.log(Level.INFO, "count = " + handler.count()); logger.log(Level.INFO, "count = " + handler.count());
assertTrue(handler.count() > 0L); assertTrue(handler.count() > 0L);
} }

View file

@ -34,7 +34,7 @@ class BundeskunsthalleTest {
writer.startDocument(); writer.startDocument();
writer.beginCollection(); writer.beginCollection();
oaiClient.listRecords("marcxml", null, oaiClient.listRecords("marcxml", null,
dateTimeFormatter, null, null, inputStream -> { dateTimeFormatter, null, null, null, null, inputStream -> {
try { try {
Marc.builder() Marc.builder()
.setInputStream(inputStream) .setInputStream(inputStream)

View file

@ -5,8 +5,6 @@ import org.junit.jupiter.api.Test;
import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.xml.SimpleMetadataHandler; import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.File;
import java.io.FileWriter;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -19,7 +17,20 @@ class DNBClientTest {
private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName()); private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName());
@Test @Test
void testBibdat() throws Exception { void testBibdatFileDump() throws Exception {
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
Instant from = Instant.parse("2021-02-01T00:00:00Z");
Instant until = Instant.parse("2021-03-01T00:00:00Z");
Instant base = Instant.parse("2010-01-01T00:00:00Z");
SplitWriter splitWriter = new SplitWriter("build/dnb-bib-pica-%d.xml", -1, 8192, false);
oaiClient.setSplitWriter(splitWriter);
oaiClient.listRecords("PicaPlus-xml", "bib",
null, from, until, base,
null, null);
}
@Test
void testBibdatSimpleMetadataHandler() throws Exception {
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository"); OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
IdentifyResponse identifyResponse = oaiClient.identify(); IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity(); String granularity = identifyResponse.getGranularity();
@ -27,12 +38,9 @@ class DNBClientTest {
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null; DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null;
Handler handler = new Handler(); Handler handler = new Handler();
File file = new File("build/dnb-bib-pica.xml");
try (FileWriter fileWriter = new FileWriter(file)) {
oaiClient.listRecords("PicaPlus-xml", "bib", oaiClient.listRecords("PicaPlus-xml", "bib",
dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"), dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"), null,
fileWriter, handler); handler, null);
}
logger.log(Level.INFO, "count=" + handler.count()); logger.log(Level.INFO, "count=" + handler.count());
assertTrue(handler.count() > 0); assertTrue(handler.count() > 0);
} }
@ -43,12 +51,12 @@ class DNBClientTest {
@Override @Override
public void startDocument() { public void startDocument() {
logger.log(Level.FINE, "start doc"); logger.log(Level.INFO, "start doc");
} }
@Override @Override
public void endDocument() { public void endDocument() {
logger.log(Level.FINE, "end doc"); logger.log(Level.INFO, "end doc");
count.incrementAndGet(); count.incrementAndGet();
} }

View file

@ -4,9 +4,6 @@ import org.junit.jupiter.api.Test;
import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.xml.SimpleMetadataHandler; import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -29,11 +26,11 @@ class DOAJClientTest {
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null; DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null;
Handler handler = new Handler(); Handler handler = new Handler();
try (Writer writer = Files.newBufferedWriter(Paths.get("build/doaj.xml"))) { SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false);
oaiClient.setSplitWriter(splitWriter);
oaiClient.listRecords("oai_dc", null, oaiClient.listRecords("oai_dc", null,
dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"), dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"), null,
writer, handler); handler, null);
}
logger.log(Level.INFO, "count = " + handler.count()); logger.log(Level.INFO, "count = " + handler.count());
assertTrue(handler.count() > 0); assertTrue(handler.count() > 0);
} }