improve Javadoc
This commit is contained in:
parent
080333dc16
commit
1df7b07410
3 changed files with 87 additions and 17 deletions
|
@ -505,7 +505,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* A GZIP output stream, modified for best compression.
|
||||||
*/
|
*/
|
||||||
private static class CompressedOutputStream extends GZIPOutputStream {
|
private static class CompressedOutputStream extends GZIPOutputStream {
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,10 @@ package org.xbib.marc.xml;
|
||||||
import org.xbib.marc.MarcField;
|
import org.xbib.marc.MarcField;
|
||||||
import org.xbib.marc.MarcListener;
|
import org.xbib.marc.MarcListener;
|
||||||
import org.xbib.marc.MarcRecord;
|
import org.xbib.marc.MarcRecord;
|
||||||
|
import org.xbib.marc.json.MarcJsonWriter;
|
||||||
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
||||||
|
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.Flushable;
|
import java.io.Flushable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -31,6 +33,7 @@ import java.io.Writer;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
@ -41,6 +44,8 @@ import java.util.concurrent.locks.Lock;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
|
import java.util.zip.Deflater;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
import javax.xml.XMLConstants;
|
import javax.xml.XMLConstants;
|
||||||
import javax.xml.namespace.QName;
|
import javax.xml.namespace.QName;
|
||||||
|
@ -60,6 +65,8 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName());
|
private static final Logger logger = Logger.getLogger(MarcXchangeWriter.class.getName());
|
||||||
|
|
||||||
|
private static final int DEFAULT_BUFFER_SIZE = 65536;
|
||||||
|
|
||||||
private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI;
|
private static final String NAMESPACE_URI = MARCXCHANGE_V2_NS_URI;
|
||||||
|
|
||||||
private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION;
|
private static final String NAMESPACE_SCHEMA_LOCATION = MARCXCHANGE_V2_0_SCHEMA_LOCATION;
|
||||||
|
@ -110,6 +117,10 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
|
|
||||||
private int splitlimit;
|
private int splitlimit;
|
||||||
|
|
||||||
|
private int bufferSize;
|
||||||
|
|
||||||
|
private boolean compress;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a MarcXchange writer on an underlying output stream.
|
* Create a MarcXchange writer on an underlying output stream.
|
||||||
* @param out the underlying output stream
|
* @param out the underlying output stream
|
||||||
|
@ -147,6 +158,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
public MarcXchangeWriter(Writer writer, boolean indent) throws IOException {
|
public MarcXchangeWriter(Writer writer, boolean indent) throws IOException {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
this.indent = indent;
|
this.indent = indent;
|
||||||
|
this.bufferSize = DEFAULT_BUFFER_SIZE;
|
||||||
this.lock = new ReentrantLock();
|
this.lock = new ReentrantLock();
|
||||||
this.documentStarted = false;
|
this.documentStarted = false;
|
||||||
this.collectionStarted = false;
|
this.collectionStarted = false;
|
||||||
|
@ -157,22 +169,27 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a MarcXchange writer in "split writer" mode.
|
* Create a MarcXchange writer in "split writer" mode.
|
||||||
* @param indent if true, indent MarcXchange output
|
|
||||||
* @param fileNamePattern file name pattern
|
* @param fileNamePattern file name pattern
|
||||||
* @param splitlimit split write limit
|
* @param splitlimit split write limit
|
||||||
|
* @param bufferSize buffer size
|
||||||
|
* @param compress if true, compress MarcXchange output
|
||||||
|
* @param indent if true, indent MarcXchange output
|
||||||
* @throws IOException if writer can not be created
|
* @throws IOException if writer can not be created
|
||||||
*/
|
*/
|
||||||
public MarcXchangeWriter(boolean indent, String fileNamePattern, int splitlimit) throws IOException {
|
public MarcXchangeWriter(String fileNamePattern, int splitlimit, int bufferSize, boolean compress, boolean indent)
|
||||||
|
throws IOException {
|
||||||
this.fileNameCounter = new AtomicInteger(0);
|
this.fileNameCounter = new AtomicInteger(0);
|
||||||
this.fileNamePattern = fileNamePattern;
|
this.fileNamePattern = fileNamePattern;
|
||||||
this.splitlimit = splitlimit;
|
this.splitlimit = splitlimit;
|
||||||
this.lock = new ReentrantLock();
|
this.bufferSize = bufferSize;
|
||||||
this.writer = newWriter(fileNamePattern, fileNameCounter);
|
this.compress = compress;
|
||||||
this.indent = indent;
|
this.indent = indent;
|
||||||
|
this.lock = new ReentrantLock();
|
||||||
this.documentStarted = false;
|
this.documentStarted = false;
|
||||||
this.collectionStarted = false;
|
this.collectionStarted = false;
|
||||||
this.eventFactory = XMLEventFactory.newInstance();
|
this.eventFactory = XMLEventFactory.newInstance();
|
||||||
this.namespace = eventFactory.createNamespace("", NAMESPACE_URI);
|
this.namespace = eventFactory.createNamespace("", NAMESPACE_URI);
|
||||||
|
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
|
||||||
setupEventConsumer(writer, indent);
|
setupEventConsumer(writer, indent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -473,7 +490,7 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
try {
|
try {
|
||||||
endCollection();
|
endCollection();
|
||||||
writer.close();
|
writer.close();
|
||||||
writer = newWriter(fileNamePattern, fileNameCounter);
|
newWriter(fileNamePattern, fileNameCounter, bufferSize, compress);
|
||||||
setupEventConsumer(writer, indent);
|
setupEventConsumer(writer, indent);
|
||||||
beginCollection();
|
beginCollection();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -483,8 +500,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Writer newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException {
|
private void newWriter(String fileNamePattern, AtomicInteger fileNameCounter,
|
||||||
return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement())));
|
int bufferSize, boolean compress)
|
||||||
|
throws IOException {
|
||||||
|
String name = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
|
||||||
|
OutputStream out = Files.newOutputStream(Paths.get(name), StandardOpenOption.CREATE,
|
||||||
|
StandardOpenOption.TRUNCATE_EXISTING);
|
||||||
|
writer = new OutputStreamWriter(compress ?
|
||||||
|
new CompressedOutputStream(out, bufferSize) :
|
||||||
|
new BufferedOutputStream(out, bufferSize), StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setupEventConsumer(Writer writer, boolean indent) throws IOException {
|
private void setupEventConsumer(Writer writer, boolean indent) throws IOException {
|
||||||
|
@ -514,4 +538,15 @@ public class MarcXchangeWriter extends MarcContentHandler implements Flushable,
|
||||||
throw new UncheckedIOException(e);
|
throw new UncheckedIOException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A GZIP output stream, modified for best compression.
|
||||||
|
*/
|
||||||
|
private static class CompressedOutputStream extends GZIPOutputStream {
|
||||||
|
|
||||||
|
CompressedOutputStream(OutputStream out, int size) throws IOException {
|
||||||
|
super(out, size, true);
|
||||||
|
def.setLevel(Deflater.BEST_COMPRESSION);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,9 +23,11 @@ import org.xbib.marc.xml.MarcXchangeWriter;
|
||||||
import org.xmlunit.matchers.CompareMatcher;
|
import org.xmlunit.matchers.CompareMatcher;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -33,19 +35,21 @@ import java.text.Normalizer;
|
||||||
public class MarcXchangeWriterTest extends Assert {
|
public class MarcXchangeWriterTest extends Assert {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void splitMARC() throws Exception {
|
public void splitMarcXchange() throws Exception {
|
||||||
String s = "IRMARC8.bin";
|
String s = "IRMARC8.bin";
|
||||||
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
||||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||||
MarcXchangeWriter writer = new MarcXchangeWriter(true, "build/%d.xml", 3)
|
// fileNamePattern, splitSize, bufferSize, compress, indent
|
||||||
.setMarcValueTransformers(marcValueTransformers);
|
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml", 3, 65536, false, true)
|
||||||
Marc.builder()
|
.setMarcValueTransformers(marcValueTransformers)) {
|
||||||
.setInputStream(in)
|
Marc.builder()
|
||||||
.setCharset(Charset.forName("ANSEL"))
|
.setInputStream(in)
|
||||||
.setMarcListener(writer)
|
.setCharset(Charset.forName("ANSEL"))
|
||||||
.build()
|
.setMarcListener(writer)
|
||||||
.writeCollection();
|
.build()
|
||||||
|
.writeCollection();
|
||||||
|
}
|
||||||
File f0 = new File("build/0.xml");
|
File f0 = new File("build/0.xml");
|
||||||
assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
|
assertThat(f0, CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
|
||||||
File f1 = new File("build/1.xml");
|
File f1 = new File("build/1.xml");
|
||||||
|
@ -58,4 +62,35 @@ public class MarcXchangeWriterTest extends Assert {
|
||||||
assertFalse(f4.exists());
|
assertFalse(f4.exists());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void splitMarcXchangeCompressed() throws Exception {
|
||||||
|
String s = "IRMARC8.bin";
|
||||||
|
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
||||||
|
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||||
|
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||||
|
// fileNamePattern, splitSize, bufferSize, compress, indent
|
||||||
|
try (MarcXchangeWriter writer = new MarcXchangeWriter("build/%d.xml.gz", 3, 65536, true, true)
|
||||||
|
.setMarcValueTransformers(marcValueTransformers)) {
|
||||||
|
Marc.builder()
|
||||||
|
.setInputStream(in)
|
||||||
|
.setCharset(Charset.forName("ANSEL"))
|
||||||
|
.setMarcListener(writer)
|
||||||
|
.build()
|
||||||
|
.writeCollection();
|
||||||
|
}
|
||||||
|
File f0 = new File("build/0.xml.gz");
|
||||||
|
assertThat(new GZIPInputStream(new FileInputStream(f0)),
|
||||||
|
CompareMatcher.isIdenticalTo(getClass().getResource("0.xml").openStream()));
|
||||||
|
File f1 = new File("build/1.xml.gz");
|
||||||
|
assertThat(new GZIPInputStream(new FileInputStream(f1)),
|
||||||
|
CompareMatcher.isIdenticalTo(getClass().getResource("1.xml").openStream()));
|
||||||
|
File f2 = new File("build/2.xml.gz");
|
||||||
|
assertThat(new GZIPInputStream(new FileInputStream(f2)),
|
||||||
|
CompareMatcher.isIdenticalTo(getClass().getResource("2.xml").openStream()));
|
||||||
|
File f3 = new File("build/3.xml.gz");
|
||||||
|
assertThat(new GZIPInputStream(new FileInputStream(f3)),
|
||||||
|
CompareMatcher.isIdenticalTo(getClass().getResource("3.xml").openStream()));
|
||||||
|
File f4 = new File("build/4.xml.gz");
|
||||||
|
assertFalse(f4.exists());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue