From 1df7b07410cca4e15363d1ee24e04f9805acf2e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Wed, 28 Sep 2016 12:03:13 +0200 Subject: [PATCH] improve Javadoc --- .../org/xbib/marc/json/MarcJsonWriter.java | 2 +- .../org/xbib/marc/xml/MarcXchangeWriter.java | 49 ++++++++++++++--- .../org/xbib/marc/MarcXchangeWriterTest.java | 53 +++++++++++++++---- 3 files changed, 87 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/xbib/marc/json/MarcJsonWriter.java b/src/main/java/org/xbib/marc/json/MarcJsonWriter.java index 6df7251..7e96ea3 100644 --- a/src/main/java/org/xbib/marc/json/MarcJsonWriter.java +++ b/src/main/java/org/xbib/marc/json/MarcJsonWriter.java @@ -505,7 +505,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo } /** - * + * A GZIP output stream, modified for best compression. */ private static class CompressedOutputStream extends GZIPOutputStream { diff --git a/src/main/java/org/xbib/marc/xml/MarcXchangeWriter.java b/src/main/java/org/xbib/marc/xml/MarcXchangeWriter.java index 65a9629..44d55fe 100644 --- a/src/main/java/org/xbib/marc/xml/MarcXchangeWriter.java +++ b/src/main/java/org/xbib/marc/xml/MarcXchangeWriter.java @@ -19,8 +19,10 @@ package org.xbib.marc.xml; import org.xbib.marc.MarcField; import org.xbib.marc.MarcListener; import org.xbib.marc.MarcRecord; +import org.xbib.marc.json.MarcJsonWriter; import org.xbib.marc.transformer.value.MarcValueTransformers; +import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.Flushable; import java.io.IOException; @@ -31,6 +33,7 @@ import java.io.Writer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; @@ -41,6 +44,8 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.zip.Deflater; +import java.util.zip.GZIPOutputStream; import javax.xml.XMLConstants; import javax.xml.namespace.QName; @@ -60,6 +65,8 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName()); + private static final int DEFAULT_BUFFER_SIZE = 65536; + private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI; private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION; @@ -110,6 +117,10 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, private int splitlimit; + private int bufferSize; + + private boolean compress; + /** * Create a MarcXchange writer on an underlying output stream. * @param out the underlying output stream @@ -147,6 +158,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, public MarcXchangeWriter(Writer writer, boolean indent) throws IOException { this.writer = writer; this.indent = indent; + this.bufferSize = DEFAULT_BUFFER_SIZE; this.lock = new ReentrantLock(); this.documentStarted = false; this.collectionStarted = false; @@ -157,22 +169,27 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, /** * Create a MarcXchange writer in "split writer" mode. - * @param indent if true, indent MarcXchange output * @param fileNamePattern file name pattern * @param splitlimit split write limit + * @param bufferSize buffer size + * @param compress if true, compress MarcXchange output + * @param indent if true, indent MarcXchange output * @throws IOException if writer can not be created */ - public MarcXchangeWriter(boolean indent, String fileNamePattern, int splitlimit) throws IOException { + public MarcXchangeWriter(String fileNamePattern, int splitlimit, int bufferSize, boolean compress, boolean indent) + throws IOException { this.fileNameCounter = new AtomicInteger(0); this.fileNamePattern = fileNamePattern; this.splitlimit = splitlimit; - this.lock = new ReentrantLock(); - this.writer = newWriter(fileNamePattern, fileNameCounter); + this.bufferSize = bufferSize; + this.compress = compress; this.indent = indent; + this.lock = new ReentrantLock(); this.documentStarted = false; this.collectionStarted = false; this.eventFactory = XMLEventFactory.newInstance(); this.namespace = eventFactory.createNamespace("", NAMESPACE_URI); + newWriter(fileNamePattern, fileNameCounter, bufferSize, compress); setupEventConsumer(writer, indent); } @@ -473,7 +490,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, try { endCollection(); writer.close(); - writer = newWriter(fileNamePattern, fileNameCounter); + newWriter(fileNamePattern, fileNameCounter, bufferSize, compress); setupEventConsumer(writer, indent); beginCollection(); } catch (IOException e) { @@ -483,8 +500,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, } } - private static Writer newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException { - return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement()))); + private void newWriter(String fileNamePattern, AtomicInteger fileNameCounter, + int bufferSize, boolean compress) + throws IOException { + String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement()); + OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + writer = new OutputStreamWriter(compress ? + new CompressedOutputStream(out, bufferSize) : + new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8); } private void setupEventConsumer(Writer writer, boolean indent) throws IOException { @@ -514,4 +538,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable, throw new UncheckedIOException(e); } } + + /** + * A GZIP output stream, modified for best compression. + */ + private static class CompressedOutputStream extends GZIPOutputStream { + + CompressedOutputStream(OutputStream out, int size) throws IOException { + super(out, size, true); + def.setLevel(Deflater.BEST_COMPRESSION); + } + } } diff --git a/src/test/java/org/xbib/marc/MarcXchangeWriterTest.java b/src/test/java/org/xbib/marc/MarcXchangeWriterTest.java index b207e46..a1eb8c2 100644 --- a/src/test/java/org/xbib/marc/MarcXchangeWriterTest.java +++ b/src/test/java/org/xbib/marc/MarcXchangeWriterTest.java @@ -23,9 +23,11 @@ import org.xbib.marc.xml.MarcXchangeWriter; import org.xmlunit.matchers.CompareMatcher; import java.io.File; +import java.io.FileInputStream; import java.io.InputStream; import java.nio.charset.Charset; import java.text.Normalizer; +import java.util.zip.GZIPInputStream; /** * @@ -33,19 +35,21 @@ import java.text.Normalizer; public class MarcXchangeWriterTest extends Assert { @Test - public void splitMARC() throws Exception { + public void splitMarcXchange() throws Exception { String s = "IRMARC8.bin"; InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream(); MarcValueTransformers marcValueTransformers = new MarcValueTransformers(); marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC)); - MarcXchangeWriter writer = new MarcXchangeWriter(true, "build/%d.xml", 3) - .setMarcValueTransformers(marcValueTransformers); - Marc.builder() - .setInputStream(in) - .setCharset(Charset.forName("ANSEL")) - .setMarcListener(writer) - .build() - .writeCollection(); + // fileNamePattern, splitSize, bufferSize, compress, indent + try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml", 3, 65536, false, true) + .setMarcValueTransformers(marcValueTransformers)) { + Marc.builder() + .setInputStream(in) + .setCharset(Charset.forName("ANSEL")) + .setMarcListener(writer) + .build() + .writeCollection(); + } File f0 = new File("build/0.xml"); assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream())); File f1 = new File("build/1.xml"); @@ -58,4 +62,35 @@ public class MarcXchangeWriterTest extends Assert { assertFalse(f4.exists()); } + @Test + public void splitMarcXchangeCompressed() throws Exception { + String s = "IRMARC8.bin"; + InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream(); + MarcValueTransformers marcValueTransformers = new MarcValueTransformers(); + marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC)); + // fileNamePattern, splitSize, bufferSize, compress, indent + try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml.gz", 3, 65536, true, true) + .setMarcValueTransformers(marcValueTransformers)) { + Marc.builder() + .setInputStream(in) + .setCharset(Charset.forName("ANSEL")) + .setMarcListener(writer) + .build() + .writeCollection(); + } + File f0 = new File("build/0.xml.gz"); + assertThat(new GZIPInputStream(new FileInputStream(f0)), + CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream())); + File f1 = new File("build/1.xml.gz"); + assertThat(new GZIPInputStream(new FileInputStream(f1)), + CompareMatcher.isIdenticalTo(getClass().getResource("1.xml").openStream())); + File f2 = new File("build/2.xml.gz"); + assertThat(new GZIPInputStream(new FileInputStream(f2)), + CompareMatcher.isIdenticalTo(getClass().getResource("2.xml").openStream())); + File f3 = new File("build/3.xml.gz"); + assertThat(new GZIPInputStream(new FileInputStream(f3)), + CompareMatcher.isIdenticalTo(getClass().getResource("3.xml").openStream())); + File f4 = new File("build/4.xml.gz"); + assertFalse(f4.exists()); + } }