improve Javadoc
This commit is contained in:
parent
080333dc16
commit
1df7b07410
3 changed files with 87 additions and 17 deletions
|
@ -505,7 +505,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* A GZIP output stream, modified for best compression.
|
||||
*/
|
||||
private static class CompressedOutputStream extends GZIPOutputStream {
|
||||
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.xbib.marc.xml;
|
|||
import org.xbib.marc.MarcField;
|
||||
import org.xbib.marc.MarcListener;
|
||||
import org.xbib.marc.MarcRecord;
|
||||
import org.xbib.marc.json.MarcJsonWriter;
|
||||
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.Flushable;
|
||||
import java.io.IOException;
|
||||
|
@ -31,6 +33,7 @@ import java.io.Writer;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
@ -41,6 +44,8 @@ import java.util.concurrent.locks.Lock;
|
|||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import javax.xml.XMLConstants;
|
||||
import javax.xml.namespace.QName;
|
||||
|
@ -60,6 +65,8 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
|
||||
private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName());
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 65536;
|
||||
|
||||
private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI;
|
||||
|
||||
private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION;
|
||||
|
@ -110,6 +117,10 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
|
||||
private int splitlimit;
|
||||
|
||||
private int bufferSize;
|
||||
|
||||
private boolean compress;
|
||||
|
||||
/**
|
||||
* Create a MarcXchange writer on an underlying output stream.
|
||||
* @param out the underlying output stream
|
||||
|
@ -147,6 +158,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
public MarcXchangeWriter(Writer writer, boolean indent) throws IOException {
|
||||
this.writer = writer;
|
||||
this.indent = indent;
|
||||
this.bufferSize = DEFAULT_BUFFER_SIZE;
|
||||
this.lock = new ReentrantLock();
|
||||
this.documentStarted = false;
|
||||
this.collectionStarted = false;
|
||||
|
@ -157,22 +169,27 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
|
||||
/**
|
||||
* Create a MarcXchange writer in "split writer" mode.
|
||||
* @param indent if true, indent MarcXchange output
|
||||
* @param fileNamePattern file name pattern
|
||||
* @param splitlimit split write limit
|
||||
* @param bufferSize buffer size
|
||||
* @param compress if true, compress MarcXchange output
|
||||
* @param indent if true, indent MarcXchange output
|
||||
* @throws IOException if writer can not be created
|
||||
*/
|
||||
public MarcXchangeWriter(boolean indent, String fileNamePattern, int splitlimit) throws IOException {
|
||||
public MarcXchangeWriter(String fileNamePattern, int splitlimit, int bufferSize, boolean compress, boolean indent)
|
||||
throws IOException {
|
||||
this.fileNameCounter = new AtomicInteger(0);
|
||||
this.fileNamePattern = fileNamePattern;
|
||||
this.splitlimit = splitlimit;
|
||||
this.lock = new ReentrantLock();
|
||||
this.writer = newWriter(fileNamePattern, fileNameCounter);
|
||||
this.bufferSize = bufferSize;
|
||||
this.compress = compress;
|
||||
this.indent = indent;
|
||||
this.lock = new ReentrantLock();
|
||||
this.documentStarted = false;
|
||||
this.collectionStarted = false;
|
||||
this.eventFactory = XMLEventFactory.newInstance();
|
||||
this.namespace = eventFactory.createNamespace("", NAMESPACE_URI);
|
||||
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
|
||||
setupEventConsumer(writer, indent);
|
||||
}
|
||||
|
||||
|
@ -473,7 +490,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
try {
|
||||
endCollection();
|
||||
writer.close();
|
||||
writer = newWriter(fileNamePattern, fileNameCounter);
|
||||
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
|
||||
setupEventConsumer(writer, indent);
|
||||
beginCollection();
|
||||
} catch (IOException e) {
|
||||
|
@ -483,8 +500,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
}
|
||||
}
|
||||
|
||||
private static Writer newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException {
|
||||
return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement())));
|
||||
private void newWriter(String fileNamePattern, AtomicInteger fileNameCounter,
|
||||
int bufferSize, boolean compress)
|
||||
throws IOException {
|
||||
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
|
||||
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
|
||||
StandardOpenOption.TRUNCATE_EXISTING);
|
||||
writer = new OutputStreamWriter(compress ?
|
||||
new CompressedOutputStream(out, bufferSize) :
|
||||
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
private void setupEventConsumer(Writer writer, boolean indent) throws IOException {
|
||||
|
@ -514,4 +538,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
|||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A GZIP output stream, modified for best compression.
|
||||
*/
|
||||
private static class CompressedOutputStream extends GZIPOutputStream {
|
||||
|
||||
CompressedOutputStream(OutputStream out, int size) throws IOException {
|
||||
super(out, size, true);
|
||||
def.setLevel(Deflater.BEST_COMPRESSION);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,9 +23,11 @@ import org.xbib.marc.xml.MarcXchangeWriter;
|
|||
import org.xmlunit.matchers.CompareMatcher;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.Normalizer;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -33,19 +35,21 @@ import java.text.Normalizer;
|
|||
public class MarcXchangeWriterTest extends Assert {
|
||||
|
||||
@Test
|
||||
public void splitMARC() throws Exception {
|
||||
public void splitMarcXchange() throws Exception {
|
||||
String s = "IRMARC8.bin";
|
||||
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
MarcXchangeWriter writer = new MarcXchangeWriter(true, "build/%d.xml", 3)
|
||||
.setMarcValueTransformers(marcValueTransformers);
|
||||
// fileNamePattern, splitSize, bufferSize, compress, indent
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml", 3, 65536, false, true)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(Charset.forName("ANSEL"))
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
}
|
||||
File f0 = new File("build/0.xml");
|
||||
assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
|
||||
File f1 = new File("build/1.xml");
|
||||
|
@ -58,4 +62,35 @@ public class MarcXchangeWriterTest extends Assert {
|
|||
assertFalse(f4.exists());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void splitMarcXchangeCompressed() throws Exception {
|
||||
String s = "IRMARC8.bin";
|
||||
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
// fileNamePattern, splitSize, bufferSize, compress, indent
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml.gz", 3, 65536, true, true)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(Charset.forName("ANSEL"))
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
}
|
||||
File f0 = new File("build/0.xml.gz");
|
||||
assertThat(new GZIPInputStream(new FileInputStream(f0)),
|
||||
CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
|
||||
File f1 = new File("build/1.xml.gz");
|
||||
assertThat(new GZIPInputStream(new FileInputStream(f1)),
|
||||
CompareMatcher.isIdenticalTo(getClass().getResource("1.xml").openStream()));
|
||||
File f2 = new File("build/2.xml.gz");
|
||||
assertThat(new GZIPInputStream(new FileInputStream(f2)),
|
||||
CompareMatcher.isIdenticalTo(getClass().getResource("2.xml").openStream()));
|
||||
File f3 = new File("build/3.xml.gz");
|
||||
assertThat(new GZIPInputStream(new FileInputStream(f3)),
|
||||
CompareMatcher.isIdenticalTo(getClass().getResource("3.xml").openStream()));
|
||||
File f4 = new File("build/4.xml.gz");
|
||||
assertFalse(f4.exists());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue