improve Javadoc

This commit is contained in:
Jörg Prante 2016-09-28 12:03:13 +02:00
parent 080333dc16
commit 1df7b07410
3 changed files with 87 additions and 17 deletions

View file

@ -505,7 +505,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
} }
/** /**
* * A GZIP output stream, modified for best compression.
*/ */
private static class CompressedOutputStream extends GZIPOutputStream { private static class CompressedOutputStream extends GZIPOutputStream {

View file

@ -19,8 +19,10 @@ package org.xbib.marc.xml;
import org.xbib.marc.MarcField; import org.xbib.marc.MarcField;
import org.xbib.marc.MarcListener; import org.xbib.marc.MarcListener;
import org.xbib.marc.MarcRecord; import org.xbib.marc.MarcRecord;
import org.xbib.marc.json.MarcJsonWriter;
import org.xbib.marc.transformer.value.MarcValueTransformers; import org.xbib.marc.transformer.value.MarcValueTransformers;
import java.io.BufferedOutputStream;
import java.io.Closeable; import java.io.Closeable;
import java.io.Flushable; import java.io.Flushable;
import java.io.IOException; import java.io.IOException;
@ -31,6 +33,7 @@ import java.io.Writer;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
@ -41,6 +44,8 @@ import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.zip.Deflater;
import java.util.zip.GZIPOutputStream;
import javax.xml.XMLConstants; import javax.xml.XMLConstants;
import javax.xml.namespace.QName; import javax.xml.namespace.QName;
@ -60,6 +65,8 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName()); private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName());
private static final int DEFAULT_BUFFER_SIZE = 65536;
private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI; private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI;
private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION; private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION;
@ -110,6 +117,10 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
private int splitlimit; private int splitlimit;
private int bufferSize;
private boolean compress;
/** /**
* Create a MarcXchange writer on an underlying output stream. * Create a MarcXchange writer on an underlying output stream.
* @param out the underlying output stream * @param out the underlying output stream
@ -147,6 +158,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
public MarcXchangeWriter(Writer writer, boolean indent) throws IOException { public MarcXchangeWriter(Writer writer, boolean indent) throws IOException {
this.writer = writer; this.writer = writer;
this.indent = indent; this.indent = indent;
this.bufferSize = DEFAULT_BUFFER_SIZE;
this.lock = new ReentrantLock(); this.lock = new ReentrantLock();
this.documentStarted = false; this.documentStarted = false;
this.collectionStarted = false; this.collectionStarted = false;
@ -157,22 +169,27 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
/** /**
* Create a MarcXchange writer in "split writer" mode. * Create a MarcXchange writer in "split writer" mode.
* @param indent if true, indent MarcXchange output
* @param fileNamePattern file name pattern * @param fileNamePattern file name pattern
* @param splitlimit split write limit * @param splitlimit split write limit
* @param bufferSize buffer size
* @param compress if true, compress MarcXchange output
* @param indent if true, indent MarcXchange output
* @throws IOException if writer can not be created * @throws IOException if writer can not be created
*/ */
public MarcXchangeWriter(boolean indent, String fileNamePattern, int splitlimit) throws IOException { public MarcXchangeWriter(String fileNamePattern, int splitlimit, int bufferSize, boolean compress, boolean indent)
throws IOException {
this.fileNameCounter = new AtomicInteger(0); this.fileNameCounter = new AtomicInteger(0);
this.fileNamePattern = fileNamePattern; this.fileNamePattern = fileNamePattern;
this.splitlimit = splitlimit; this.splitlimit = splitlimit;
this.lock = new ReentrantLock(); this.bufferSize = bufferSize;
this.writer = newWriter(fileNamePattern, fileNameCounter); this.compress = compress;
this.indent = indent; this.indent = indent;
this.lock = new ReentrantLock();
this.documentStarted = false; this.documentStarted = false;
this.collectionStarted = false; this.collectionStarted = false;
this.eventFactory = XMLEventFactory.newInstance(); this.eventFactory = XMLEventFactory.newInstance();
this.namespace = eventFactory.createNamespace("", NAMESPACE_URI); this.namespace = eventFactory.createNamespace("", NAMESPACE_URI);
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
setupEventConsumer(writer, indent); setupEventConsumer(writer, indent);
} }
@ -473,7 +490,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
try { try {
endCollection(); endCollection();
writer.close(); writer.close();
writer = newWriter(fileNamePattern, fileNameCounter); newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
setupEventConsumer(writer, indent); setupEventConsumer(writer, indent);
beginCollection(); beginCollection();
} catch (IOException e) { } catch (IOException e) {
@ -483,8 +500,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
} }
} }
private static Writer newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException { private void newWriter(String fileNamePattern, AtomicInteger fileNameCounter,
return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement()))); int bufferSize, boolean compress)
throws IOException {
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING);
writer = new OutputStreamWriter(compress ?
new CompressedOutputStream(out, bufferSize) :
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
} }
private void setupEventConsumer(Writer writer, boolean indent) throws IOException { private void setupEventConsumer(Writer writer, boolean indent) throws IOException {
@ -514,4 +538,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
throw new UncheckedIOException(e); throw new UncheckedIOException(e);
} }
} }
/**
* A GZIP output stream, modified for best compression.
*/
private static class CompressedOutputStream extends GZIPOutputStream {
CompressedOutputStream(OutputStream out, int size) throws IOException {
super(out, size, true);
def.setLevel(Deflater.BEST_COMPRESSION);
}
}
} }

View file

@ -23,9 +23,11 @@ import org.xbib.marc.xml.MarcXchangeWriter;
import org.xmlunit.matchers.CompareMatcher; import org.xmlunit.matchers.CompareMatcher;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.zip.GZIPInputStream;
/** /**
* *
@ -33,19 +35,21 @@ import java.text.Normalizer;
public class MarcXchangeWriterTest extends Assert { public class MarcXchangeWriterTest extends Assert {
@Test @Test
public void splitMARC() throws Exception { public void splitMarcXchange() throws Exception {
String s = "IRMARC8.bin"; String s = "IRMARC8.bin";
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream(); InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
MarcValueTransformers marcValueTransformers = new MarcValueTransformers(); MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC)); marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
MarcXchangeWriter writer = new MarcXchangeWriter(true, "build/%d.xml", 3) // fileNamePattern, splitSize, bufferSize, compress, indent
.setMarcValueTransformers(marcValueTransformers); try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml", 3, 65536, false, true)
.setMarcValueTransformers(marcValueTransformers)) {
Marc.builder() Marc.builder()
.setInputStream(in) .setInputStream(in)
.setCharset(Charset.forName("ANSEL")) .setCharset(Charset.forName("ANSEL"))
.setMarcListener(writer) .setMarcListener(writer)
.build() .build()
.writeCollection(); .writeCollection();
}
File f0 = new File("build/0.xml"); File f0 = new File("build/0.xml");
assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream())); assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
File f1 = new File("build/1.xml"); File f1 = new File("build/1.xml");
@ -58,4 +62,35 @@ public class MarcXchangeWriterTest extends Assert {
assertFalse(f4.exists()); assertFalse(f4.exists());
} }
@Test
public void splitMarcXchangeCompressed() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
// fileNamePattern, splitSize, bufferSize, compress, indent
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml.gz", 3, 65536, true, true)
.setMarcValueTransformers(marcValueTransformers)) {
Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"))
.setMarcListener(writer)
.build()
.writeCollection();
}
File f0 = new File("build/0.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f0)),
CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
File f1 = new File("build/1.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f1)),
CompareMatcher.isIdenticalTo(getClass().getResource("1.xml").openStream()));
File f2 = new File("build/2.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f2)),
CompareMatcher.isIdenticalTo(getClass().getResource("2.xml").openStream()));
File f3 = new File("build/3.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f3)),
CompareMatcher.isIdenticalTo(getClass().getResource("3.xml").openStream()));
File f4 = new File("build/4.xml.gz");
assertFalse(f4.exists());
}
} }