From 560bd890f0dd0d491db5cf23a5bd84863e36198d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Thu, 4 Feb 2021 16:17:52 +0100 Subject: [PATCH] add split writer to record OAI list records responses --- gradle.properties | 2 +- .../xbib/oai/client/AbstractOAIResponse.java | 14 -- .../java/org/xbib/oai/client/OAIClient.java | 157 +++++------------- .../java/org/xbib/oai/client/SplitWriter.java | 104 ++++++++++++ .../client/getrecord/GetRecordResponse.java | 10 +- .../oai/client/identify/IdentifyResponse.java | 5 +- .../ListIdentifiersResponse.java | 14 +- .../ListMetadataFormatsResponse.java | 14 +- .../listrecords/ListRecordsFilterReader.java | 6 +- .../listrecords/ListRecordsResponse.java | 35 ++-- .../oai/client/listsets/ListSetsResponse.java | 15 +- .../org/xbib/oai/client/ArxivClientTest.java | 11 +- .../xbib/oai/client/BundeskunsthalleTest.java | 2 +- .../org/xbib/oai/client/DNBClientTest.java | 28 ++-- .../org/xbib/oai/client/DOAJClientTest.java | 13 +- 15 files changed, 205 insertions(+), 225 deletions(-) delete mode 100644 oai-client/src/main/java/org/xbib/oai/client/AbstractOAIResponse.java create mode 100644 oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java diff --git a/gradle.properties b/gradle.properties index c6c6ed8..9c3c27f 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ group = org.xbib name = oai -version = 2.5.2 +version = 2.5.3 gradle.wrapper.version = 6.6.1 xbib-content.version = 2.6.2 diff --git a/oai-client/src/main/java/org/xbib/oai/client/AbstractOAIResponse.java b/oai-client/src/main/java/org/xbib/oai/client/AbstractOAIResponse.java deleted file mode 100644 index ccd8deb..0000000 --- a/oai-client/src/main/java/org/xbib/oai/client/AbstractOAIResponse.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.xbib.oai.client; - -import org.xbib.oai.OAIResponse; -import org.xbib.oai.exceptions.OAIException; - -import java.io.Writer; - -/** - * Default OAI response. - */ -public abstract class AbstractOAIResponse implements OAIResponse { - - public abstract void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException; -} diff --git a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java index ac8da82..04e9288 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java +++ b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java @@ -17,7 +17,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; -import java.io.Writer; import java.net.URI; import java.net.http.HttpClient; import java.net.http.HttpRequest; @@ -46,6 +45,8 @@ public class OAIClient { private String userAgent; + private SplitWriter splitWriter; + public OAIClient(String baseURL) { this.baseURL = baseURL; this.httpClient = HttpClient.newBuilder() @@ -57,6 +58,14 @@ public class OAIClient { this.userAgent = userAgent; } + public void setSplitWriter(SplitWriter splitWriter) { + this.splitWriter = splitWriter; + } + + public SplitWriter getSplitWriter() { + return splitWriter; + } + /** * This verb is used to retrieve information about a repository. * Some of the information returned is required as part of the OAI-PMH. @@ -140,25 +149,24 @@ public class OAIClient { * attribute of "deleted" if a record matching the arguments * specified in the request has been deleted. No metadata * will be present for records with deleted status. + * + * @param metadataPrefix + * @param set + * @param dateTimeFormatter + * @param from + * @param until + * @param base + * @param handler + * @param consumer */ - public void listRecords(String metadataPrefix, - String set, - DateTimeFormatter dateTimeFormatter, - Instant from, - Instant until, - Writer writer, - MetadataHandler handler) { - listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, writer, handler); - } - public void listRecords(String metadataPrefix, String set, DateTimeFormatter dateTimeFormatter, Instant from, Instant until, Instant base, - Writer writer, - MetadataHandler handler) { + MetadataHandler handler, + Consumer consumer) throws IOException { do { ListRecordsRequest listRecordsRequest = new ListRecordsRequest(); if (metadataPrefix != null) { @@ -196,108 +204,33 @@ public class OAIClient { .GET() .build(); logger.log(Level.INFO, "sending " + httpRequest); - HttpResponse httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString()); - int status = httpResponse.statusCode(); - String contentType = httpResponse.headers().firstValue("content-type").orElse(null); - String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null); - listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, writer); - logger.log(Level.FINE, "response headers = " + httpResponse.headers() + - " resumption-token = " + listRecordsResponse.getResumptionToken()); - listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken()); - } catch (NoRecordsMatchException e) { - logger.log(Level.WARNING, "no records match"); - } catch (Exception e) { - logger.log(Level.SEVERE, e.getMessage(), e); - listRecordsRequest = null; - } - } - if (base != null && from != null && until != null) { - LocalDate fromLocalDate = LocalDate.ofInstant(from, ZoneOffset.UTC); - LocalDate untilLocalDate = LocalDate.ofInstant(until, ZoneOffset.UTC); - Period period = Period.between(fromLocalDate, untilLocalDate); - logger.log(Level.INFO, "from = " + fromLocalDate + " until = " + untilLocalDate + " period = " + period); - if (period.getYears() > 0 || period.getMonths() > 0 || period.getDays() > 0) { - from = LocalDateTime.ofInstant(from, ZoneOffset.UTC) - .plusYears(-period.getYears()) - .plusMonths(-period.getMonths()) - .plusDays(-period.getDays()) - .toInstant(ZoneOffset.UTC); - until = LocalDateTime.ofInstant(until, ZoneOffset.UTC) - .plusYears(-period.getYears()) - .plusMonths(-period.getMonths()) - .plusDays(-period.getDays()) - .toInstant(ZoneOffset.UTC); - } else { - throw new IllegalStateException("from = " + from + " until = " + until + ": period is zero"); - } - } - } while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base)); - } - - public void listRecords(String metadataPrefix, - String set, - DateTimeFormatter dateTimeFormatter, - Instant from, - Instant until, - Consumer consumer) { - listRecords(metadataPrefix, set, dateTimeFormatter, from, until, null, consumer); - } - - public void listRecords(String metadataPrefix, - String set, - DateTimeFormatter dateTimeFormatter, - Instant from, - Instant until, - Instant base, - Consumer consumer) { - do { - ListRecordsRequest listRecordsRequest = new ListRecordsRequest(); - if (metadataPrefix != null) { - listRecordsRequest.setMetadataPrefix(metadataPrefix); - } - if (set != null) { - listRecordsRequest.setSet(set); - } - if (dateTimeFormatter != null) { - listRecordsRequest.setDateTimeFormatter(dateTimeFormatter); - } - if (from != null) { - listRecordsRequest.setFrom(from); - } - if (until != null) { - listRecordsRequest.setUntil(until); - } - if (from != null && until != null) { - if (until.isBefore(from)) { - throw new IllegalArgumentException("until must not be before from"); - } - } - while (listRecordsRequest != null) { - try { - StringWriter sw = new StringWriter(); - ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest); - URL.Builder url = URL.from(baseURL).mutator(); - listRecordsRequest.getParams().forEach(url::queryParam); - HttpRequest httpRequest = HttpRequest.newBuilder() - .uri(URI.create(url.build().toExternalForm())) - .header("accept", "utf-8") - .header("user-agent", userAgent != null ? userAgent : "xbib OAI client") - .GET() - .build(); - logger.log(Level.INFO, "sending " + httpRequest); - HttpResponse httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray()); - int status = httpResponse.statusCode(); - String contentType = httpResponse.headers().firstValue("content-type").orElse(null); - String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null); - listRecordsResponse.receivedResponse(new String(httpResponse.body(), StandardCharsets.UTF_8), status, contentType, retryAfter, sw); if (consumer != null) { - consumer.accept(new ByteArrayInputStream(httpResponse.body())); + HttpResponse httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray()); + int status = httpResponse.statusCode(); + String contentType = httpResponse.headers().firstValue("content-type").orElse(null); + String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null); + String body = new String(httpResponse.body(), StandardCharsets.UTF_8); + listRecordsResponse.receivedResponse(body, status, contentType, retryAfter, splitWriter); + logger.log(Level.FINE, "response headers = " + httpResponse.headers() + + " resumption-token = " + listRecordsResponse.getResumptionToken()); + byte[] b = httpResponse.body(); + if (b.length > 0) { + consumer.accept(new ByteArrayInputStream(b)); + logger.log(Level.FINE, "body consumed: " + body); + } + } else { + HttpResponse httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString()); + int status = httpResponse.statusCode(); + String contentType = httpResponse.headers().firstValue("content-type").orElse(null); + String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null); + listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, splitWriter); + logger.log(Level.FINE, "response headers = " + httpResponse.headers() + + " resumption-token = " + listRecordsResponse.getResumptionToken()); } - logger.log(Level.FINE, "response headers = " + httpResponse.headers() + - " resumption-token = " + listRecordsResponse.getResumptionToken()); listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken()); } catch (NoRecordsMatchException e) { logger.log(Level.WARNING, "no records match"); + listRecordsRequest = null; } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage(), e); listRecordsRequest = null; @@ -324,6 +257,9 @@ public class OAIClient { } } } while (base != null && from != null && until != null && from.isAfter(base) && until.isAfter(base)); + if (splitWriter != null) { + splitWriter.close();; + } } public IdentifyRequest resume(IdentifyRequest request, ResumptionToken token) { @@ -403,5 +339,4 @@ public class OAIClient { nextRequest.setResumptionToken(token); return nextRequest; } - } diff --git a/oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java b/oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java new file mode 100644 index 0000000..9a13d86 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/SplitWriter.java @@ -0,0 +1,104 @@ +package org.xbib.oai.client; + +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.Flushable; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; +import java.util.zip.Deflater; +import java.util.zip.GZIPOutputStream; + +public class SplitWriter extends Writer implements Flushable, Closeable { + + private final String fileNamePattern; + + private final int splitSize; + + private final int bufferSize; + + private final boolean compress; + + private final AtomicInteger fileNameCounter; + + private final AtomicLong splitCounter; + + private final ReentrantLock lock; + + private Writer writer; + + public SplitWriter(String fileNamePattern, + int splitSize, + int bufferSize, + boolean compress) { + this.fileNameCounter = new AtomicInteger(); + this.splitCounter = new AtomicLong(); + this.fileNamePattern = fileNamePattern; + this.splitSize = splitSize; + this.bufferSize = bufferSize; + this.compress = compress; + this.lock = new ReentrantLock(); + } + + @Override + public void write(char[] cbuf, int off, int len) throws IOException { + if (cbuf == null) { + return; + } + lock.lock(); + try { + if (writer == null) { + split(); + } + writer.write(cbuf, off, len); + if (splitSize > 0 && splitCounter.addAndGet(len) > splitSize) { + splitCounter.set(0L); + split(); + } + } finally { + lock.unlock(); + } + } + + @Override + public void close() throws IOException { + if (writer != null) { + writer.close(); + } + } + + @Override + public void flush() throws IOException { + if (writer != null) { + writer.flush(); + } + } + + public void split() throws IOException { + if (writer != null) { + writer.close(); + } + String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement()); + OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + this.writer = new OutputStreamWriter(compress ? + new CompressedOutputStream(out, bufferSize) : + new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8); + } + + private static class CompressedOutputStream extends GZIPOutputStream { + + CompressedOutputStream(OutputStream out, int size) throws IOException { + super(out, size, true); + def.setLevel(Deflater.BEST_COMPRESSION); + } + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/getrecord/GetRecordResponse.java b/oai-client/src/main/java/org/xbib/oai/client/getrecord/GetRecordResponse.java index d94866c..f6db907 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/getrecord/GetRecordResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/getrecord/GetRecordResponse.java @@ -1,16 +1,10 @@ package org.xbib.oai.client.getrecord; -import org.xbib.oai.client.AbstractOAIResponse; - -import java.io.Writer; +import org.xbib.oai.OAIResponse; /** * */ -public class GetRecordResponse extends AbstractOAIResponse { +public class GetRecordResponse implements OAIResponse { - @Override - public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) { - // not implemented yet - } } diff --git a/oai-client/src/main/java/org/xbib/oai/client/identify/IdentifyResponse.java b/oai-client/src/main/java/org/xbib/oai/client/identify/IdentifyResponse.java index 7a120db..3d84730 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/identify/IdentifyResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/identify/IdentifyResponse.java @@ -3,7 +3,7 @@ package org.xbib.oai.client.identify; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; -import org.xbib.oai.client.AbstractOAIResponse; +import org.xbib.oai.OAIResponse; import org.xbib.oai.exceptions.OAIException; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -22,7 +22,7 @@ import javax.xml.parsers.ParserConfigurationException; /** * */ -public class IdentifyResponse extends AbstractOAIResponse { +public class IdentifyResponse implements OAIResponse { private String repositoryName; @@ -40,7 +40,6 @@ public class IdentifyResponse extends AbstractOAIResponse { private String compression; - @Override public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); diff --git a/oai-client/src/main/java/org/xbib/oai/client/listidentifiers/ListIdentifiersResponse.java b/oai-client/src/main/java/org/xbib/oai/client/listidentifiers/ListIdentifiersResponse.java index cfbba13..8665aa8 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/listidentifiers/ListIdentifiersResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/listidentifiers/ListIdentifiersResponse.java @@ -1,17 +1,7 @@ package org.xbib.oai.client.listidentifiers; -import org.xbib.oai.client.AbstractOAIResponse; -import org.xbib.oai.exceptions.OAIException; +import org.xbib.oai.OAIResponse; -import java.io.Writer; +public class ListIdentifiersResponse implements OAIResponse { -/** - * - */ -public class ListIdentifiersResponse extends AbstractOAIResponse { - - @Override - public void receivedResponse(String message, int statusCode, String contentTyep, String retryAfter, Writer writer) throws OAIException { - // not implemented yet - } } diff --git a/oai-client/src/main/java/org/xbib/oai/client/listmetadataformats/ListMetadataFormatsResponse.java b/oai-client/src/main/java/org/xbib/oai/client/listmetadataformats/ListMetadataFormatsResponse.java index cc01712..891e027 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/listmetadataformats/ListMetadataFormatsResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/listmetadataformats/ListMetadataFormatsResponse.java @@ -1,17 +1,7 @@ package org.xbib.oai.client.listmetadataformats; -import org.xbib.oai.client.AbstractOAIResponse; -import org.xbib.oai.exceptions.OAIException; +import org.xbib.oai.OAIResponse; -import java.io.Writer; +public class ListMetadataFormatsResponse implements OAIResponse { -/** - * - */ -public class ListMetadataFormatsResponse extends AbstractOAIResponse { - - @Override - public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException { - // not implemented yet - } } diff --git a/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsFilterReader.java b/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsFilterReader.java index 6d8d3b7..6cdfd10 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsFilterReader.java +++ b/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsFilterReader.java @@ -7,7 +7,6 @@ import org.xbib.oai.util.ResumptionToken; import org.xbib.oai.xml.MetadataHandler; import org.xml.sax.Attributes; import org.xml.sax.SAXException; -import org.xml.sax.ContentHandler; import java.time.Instant; import java.time.LocalDate; @@ -16,9 +15,6 @@ import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; -/** - * - */ public class ListRecordsFilterReader extends XMLFilterReader { private final ListRecordsRequest request; @@ -125,7 +121,7 @@ public class ListRecordsFilterReader extends XMLFilterReader { inMetadata = false; break; case "responseDate": - response.setDate(Instant.parse(content.toString().trim())); + //response.setDate(Instant.parse(content.toString().trim())); break; case "resumptionToken": if (token != null && content != null && content.length() > 0) { diff --git a/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsResponse.java b/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsResponse.java index 0be3a0a..4ff42a4 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/listrecords/ListRecordsResponse.java @@ -2,7 +2,8 @@ package org.xbib.oai.client.listrecords; import org.xbib.content.xml.transform.TransformerURIResolver; import org.xbib.content.xml.util.XMLUtil; -import org.xbib.oai.client.AbstractOAIResponse; +import org.xbib.oai.OAIResponse; +import org.xbib.oai.client.SplitWriter; import org.xbib.oai.exceptions.BadVerbException; import org.xbib.oai.exceptions.BadArgumentException; import org.xbib.oai.exceptions.BadResumptionTokenException; @@ -11,12 +12,13 @@ import org.xbib.oai.exceptions.OAIException; import org.xbib.oai.util.ResumptionToken; import org.xml.sax.InputSource; +import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; -import java.io.Writer; import java.time.Instant; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; + import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -24,10 +26,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamResult; -/** - * - */ -public class ListRecordsResponse extends AbstractOAIResponse { +public class ListRecordsResponse implements OAIResponse { private final ListRecordsRequest request; @@ -37,8 +36,6 @@ public class ListRecordsResponse extends AbstractOAIResponse { private String error; - private Instant date; - public ListRecordsResponse(ListRecordsRequest request) { this.request = request; this.retryAfterMillis = 20L * 1000L; // 20 seconds by default @@ -57,16 +54,7 @@ public class ListRecordsResponse extends AbstractOAIResponse { return error; } - public void setDate(Instant date) { - this.date = date; - } - - public Instant getDate() { - return date; - } - - @Override - public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException { + public void receivedResponse(String message, int status, String contentType, String retryAfter, SplitWriter splitWriter) throws OAIException { if (status == 503) { long secs = retryAfterMillis / 1000; if (retryAfter != null) { @@ -105,11 +93,18 @@ public class ListRecordsResponse extends AbstractOAIResponse { this.filterreader = new ListRecordsFilterReader(request, this); if (message != null) { try { + // OAI does not know of doc streams. + // Each XML transformation runs on a single XML per call. To record the XML of subsequent list record calls, + // we use a file writer that can roll over XML docs and count the files by filename. + // So we effectively record the calls to the OAI server, not the result docs in the calls. + if (splitWriter != null) { + splitWriter.split(); + } TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setURIResolver(new TransformerURIResolver("xsl")); Transformer transformer = transformerFactory.newTransformer(); Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message)))); - StreamResult streamResult = writer != null ? new StreamResult(writer) : new StreamResult(new StringWriter()); + StreamResult streamResult = new StreamResult(splitWriter != null ? splitWriter : new StringWriter()); transformer.transform(source, streamResult); if ("noRecordsMatch".equals(error)) { throw new NoRecordsMatchException("metadataPrefix=" + request.getMetadataPrefix() @@ -125,7 +120,7 @@ public class ListRecordsResponse extends AbstractOAIResponse { } else if (error != null) { throw new OAIException(error); } - } catch (TransformerException t) { + } catch (TransformerException | IOException t) { throw new OAIException(t); } } diff --git a/oai-client/src/main/java/org/xbib/oai/client/listsets/ListSetsResponse.java b/oai-client/src/main/java/org/xbib/oai/client/listsets/ListSetsResponse.java index 3470042..7a9d7b3 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/listsets/ListSetsResponse.java +++ b/oai-client/src/main/java/org/xbib/oai/client/listsets/ListSetsResponse.java @@ -1,17 +1,6 @@ package org.xbib.oai.client.listsets; -import org.xbib.oai.client.AbstractOAIResponse; -import org.xbib.oai.exceptions.OAIException; +import org.xbib.oai.OAIResponse; -import java.io.Writer; - -/** - * - */ -public class ListSetsResponse extends AbstractOAIResponse { - - @Override - public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException { - // not implemented yet - } +public class ListSetsResponse implements OAIResponse { } diff --git a/oai-client/src/test/java/org/xbib/oai/client/ArxivClientTest.java b/oai-client/src/test/java/org/xbib/oai/client/ArxivClientTest.java index c3d0445..a5ca5b7 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/ArxivClientTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/ArxivClientTest.java @@ -6,8 +6,6 @@ import org.junit.jupiter.api.Test; import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.xml.SimpleMetadataHandler; -import java.io.File; -import java.io.FileWriter; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -21,7 +19,9 @@ class ArxivClientTest { @Test void testListRecordsArxivWithJdkClient() throws Exception { + SplitWriter splitWriter = new SplitWriter("build/arxiv-%d.oai", -1, 8192, false); OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/"); + oaiClient.setSplitWriter(splitWriter); IdentifyResponse identifyResponse = oaiClient.identify(); String granularity = identifyResponse.getGranularity(); logger.log(Level.INFO, "granularity = " + granularity); @@ -31,12 +31,9 @@ class ArxivClientTest { logger.log(Level.INFO,"waiting 20 seconds"); Thread.sleep(20 * 1000L); Handler handler = new Handler(); - File file = File.createTempFile("arxiv.", ".xml"); - file.deleteOnExit(); - FileWriter fileWriter = new FileWriter(file); oaiClient.listRecords("arXiv", null, - dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), fileWriter, handler); - fileWriter.close(); + dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), null, + handler, null); logger.log(Level.INFO, "count = " + handler.count()); assertTrue(handler.count() > 0L); } diff --git a/oai-client/src/test/java/org/xbib/oai/client/BundeskunsthalleTest.java b/oai-client/src/test/java/org/xbib/oai/client/BundeskunsthalleTest.java index 87db032..39b0812 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/BundeskunsthalleTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/BundeskunsthalleTest.java @@ -34,7 +34,7 @@ class BundeskunsthalleTest { writer.startDocument(); writer.beginCollection(); oaiClient.listRecords("marcxml", null, - dateTimeFormatter, null, null, inputStream -> { + dateTimeFormatter, null, null, null, null, inputStream -> { try { Marc.builder() .setInputStream(inputStream) diff --git a/oai-client/src/test/java/org/xbib/oai/client/DNBClientTest.java b/oai-client/src/test/java/org/xbib/oai/client/DNBClientTest.java index e721001..daa6924 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/DNBClientTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/DNBClientTest.java @@ -5,8 +5,6 @@ import org.junit.jupiter.api.Test; import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.xml.SimpleMetadataHandler; -import java.io.File; -import java.io.FileWriter; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -19,7 +17,20 @@ class DNBClientTest { private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName()); @Test - void testBibdat() throws Exception { + void testBibdatFileDump() throws Exception { + OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository"); + Instant from = Instant.parse("2021-02-01T00:00:00Z"); + Instant until = Instant.parse("2021-03-01T00:00:00Z"); + Instant base = Instant.parse("2010-01-01T00:00:00Z"); + SplitWriter splitWriter = new SplitWriter("build/dnb-bib-pica-%d.xml", -1, 8192, false); + oaiClient.setSplitWriter(splitWriter); + oaiClient.listRecords("PicaPlus-xml", "bib", + null, from, until, base, + null, null); + } + + @Test + void testBibdatSimpleMetadataHandler() throws Exception { OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository"); IdentifyResponse identifyResponse = oaiClient.identify(); String granularity = identifyResponse.getGranularity(); @@ -27,12 +38,9 @@ class DNBClientTest { DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null; Handler handler = new Handler(); - File file = new File("build/dnb-bib-pica.xml"); - try (FileWriter fileWriter = new FileWriter(file)) { oaiClient.listRecords("PicaPlus-xml", "bib", - dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"), - fileWriter, handler); - } + dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"), null, + handler, null); logger.log(Level.INFO, "count=" + handler.count()); assertTrue(handler.count() > 0); } @@ -43,12 +51,12 @@ class DNBClientTest { @Override public void startDocument() { - logger.log(Level.FINE, "start doc"); + logger.log(Level.INFO, "start doc"); } @Override public void endDocument() { - logger.log(Level.FINE, "end doc"); + logger.log(Level.INFO, "end doc"); count.incrementAndGet(); } diff --git a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java index 67d5fed..f4bdb2a 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java @@ -4,9 +4,6 @@ import org.junit.jupiter.api.Test; import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.xml.SimpleMetadataHandler; -import java.io.Writer; -import java.nio.file.Files; -import java.nio.file.Paths; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -29,11 +26,11 @@ class DOAJClientTest { DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null; Handler handler = new Handler(); - try (Writer writer = Files.newBufferedWriter(Paths.get("build/doaj.xml"))) { - oaiClient.listRecords("oai_dc", null, - dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"), - writer, handler); - } + SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false); + oaiClient.setSplitWriter(splitWriter); + oaiClient.listRecords("oai_dc", null, + dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"), null, + handler, null); logger.log(Level.INFO, "count = " + handler.count()); assertTrue(handler.count() > 0); }