improve Javadoc

This commit is contained in:
Jörg Prante 2016-09-28 12:03:13 +02:00
parent 080333dc16
commit 1df7b07410
3 changed files with 87 additions and 17 deletions

View file

@ -505,7 +505,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
}
/**
*
* A GZIP output stream, modified for best compression.
*/
private static class CompressedOutputStream extends GZIPOutputStream {

View file

@ -19,8 +19,10 @@ package org.xbib.marc.xml;
import org.xbib.marc.MarcField;
import org.xbib.marc.MarcListener;
import org.xbib.marc.MarcRecord;
import org.xbib.marc.json.MarcJsonWriter;
import org.xbib.marc.transformer.value.MarcValueTransformers;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
@ -31,6 +33,7 @@ import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
@ -41,6 +44,8 @@ import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.Deflater;
import java.util.zip.GZIPOutputStream;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
@ -60,6 +65,8 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName());
private static final int DEFAULT_BUFFER_SIZE = 65536;
private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI;
private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION;
@ -110,6 +117,10 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
private int splitlimit;
private int bufferSize;
private boolean compress;
/**
* Create a MarcXchange writer on an underlying output stream.
* @param out the underlying output stream
@ -147,6 +158,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
public MarcXchangeWriter(Writer writer, boolean indent) throws IOException {
this.writer = writer;
this.indent = indent;
this.bufferSize = DEFAULT_BUFFER_SIZE;
this.lock = new ReentrantLock();
this.documentStarted = false;
this.collectionStarted = false;
@ -157,22 +169,27 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
/**
* Create a MarcXchange writer in "split writer" mode.
* @param indent if true, indent MarcXchange output
* @param fileNamePattern file name pattern
* @param splitlimit split write limit
* @param bufferSize buffer size
* @param compress if true, compress MarcXchange output
* @param indent if true, indent MarcXchange output
* @throws IOException if writer can not be created
*/
public MarcXchangeWriter(boolean indent, String fileNamePattern, int splitlimit) throws IOException {
public MarcXchangeWriter(String fileNamePattern, int splitlimit, int bufferSize, boolean compress, boolean indent)
throws IOException {
this.fileNameCounter = new AtomicInteger(0);
this.fileNamePattern = fileNamePattern;
this.splitlimit = splitlimit;
this.lock = new ReentrantLock();
this.writer = newWriter(fileNamePattern, fileNameCounter);
this.bufferSize = bufferSize;
this.compress = compress;
this.indent = indent;
this.lock = new ReentrantLock();
this.documentStarted = false;
this.collectionStarted = false;
this.eventFactory = XMLEventFactory.newInstance();
this.namespace = eventFactory.createNamespace("", NAMESPACE_URI);
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
setupEventConsumer(writer, indent);
}
@ -473,7 +490,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
try {
endCollection();
writer.close();
writer = newWriter(fileNamePattern, fileNameCounter);
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
setupEventConsumer(writer, indent);
beginCollection();
} catch (IOException e) {
@ -483,8 +500,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
}
}
private static Writer newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException {
return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement())));
private void newWriter(String fileNamePattern, AtomicInteger fileNameCounter,
int bufferSize, boolean compress)
throws IOException {
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING);
writer = new OutputStreamWriter(compress ?
new CompressedOutputStream(out, bufferSize) :
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
}
private void setupEventConsumer(Writer writer, boolean indent) throws IOException {
@ -514,4 +538,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
throw new UncheckedIOException(e);
}
}
/**
* A GZIP output stream, modified for best compression.
*/
private static class CompressedOutputStream extends GZIPOutputStream {
CompressedOutputStream(OutputStream out, int size) throws IOException {
super(out, size, true);
def.setLevel(Deflater.BEST_COMPRESSION);
}
}
}

View file

@ -23,9 +23,11 @@ import org.xbib.marc.xml.MarcXchangeWriter;
import org.xmlunit.matchers.CompareMatcher;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.Normalizer;
import java.util.zip.GZIPInputStream;
/**
*
@ -33,19 +35,21 @@ import java.text.Normalizer;
public class MarcXchangeWriterTest extends Assert {
@Test
public void splitMARC() throws Exception {
public void splitMarcXchange() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
MarcXchangeWriter writer = new MarcXchangeWriter(true, "build/%d.xml", 3)
.setMarcValueTransformers(marcValueTransformers);
// fileNamePattern, splitSize, bufferSize, compress, indent
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml", 3, 65536, false, true)
.setMarcValueTransformers(marcValueTransformers)) {
Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"))
.setMarcListener(writer)
.build()
.writeCollection();
}
File f0 = new File("build/0.xml");
assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
File f1 = new File("build/1.xml");
@ -58,4 +62,35 @@ public class MarcXchangeWriterTest extends Assert {
assertFalse(f4.exists());
}
@Test
public void splitMarcXchangeCompressed() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
// fileNamePattern, splitSize, bufferSize, compress, indent
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml.gz", 3, 65536, true, true)
.setMarcValueTransformers(marcValueTransformers)) {
Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"))
.setMarcListener(writer)
.build()
.writeCollection();
}
File f0 = new File("build/0.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f0)),
CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
File f1 = new File("build/1.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f1)),
CompareMatcher.isIdenticalTo(getClass().getResource("1.xml").openStream()));
File f2 = new File("build/2.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f2)),
CompareMatcher.isIdenticalTo(getClass().getResource("2.xml").openStream()));
File f3 = new File("build/3.xml.gz");
assertThat(new GZIPInputStream(new FileInputStream(f3)),
CompareMatcher.isIdenticalTo(getClass().getResource("3.xml").openStream()));
File f4 = new File("build/4.xml.gz");
assertFalse(f4.exists());
}
}