allow value transforming in JSON writer, add buffer size
This commit is contained in:
parent
ffcb74abcf
commit
09e8bedebe
7 changed files with 180 additions and 91 deletions
|
@ -1,3 +1,3 @@
|
|||
group = org.xbib
|
||||
version = 1.0.2
|
||||
version = 1.0.3
|
||||
org.gradle.daemon = true
|
||||
|
|
|
@ -21,10 +21,12 @@ import org.xbib.marc.MarcField;
|
|||
import org.xbib.marc.MarcListener;
|
||||
import org.xbib.marc.MarcRecord;
|
||||
import org.xbib.marc.label.RecordLabel;
|
||||
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
||||
import org.xbib.marc.xml.MarcContentHandler;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.Closeable;
|
||||
import java.io.FileWriter;
|
||||
import java.io.Flushable;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
@ -32,8 +34,6 @@ import java.io.OutputStreamWriter;
|
|||
import java.io.UncheckedIOException;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -42,6 +42,8 @@ import java.util.concurrent.locks.Lock;
|
|||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This Marc Writer is a MarcContentHandler that writes Marc events to JSON.
|
||||
|
@ -50,6 +52,8 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
|
||||
private static final Logger logger = Logger.getLogger(MarcJsonWriter.class.getName());
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
|
||||
public static final String LEADER_TAG = "_LEADER";
|
||||
|
||||
public static final String FORMAT_TAG = "_FORMAT";
|
||||
|
@ -76,6 +80,8 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
|
||||
private int splitlimit;
|
||||
|
||||
private int bufferSize;
|
||||
|
||||
/**
|
||||
* Flag for indicating if writer is at top of file.
|
||||
*/
|
||||
|
@ -86,15 +92,20 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
|
||||
public MarcJsonWriter(OutputStream out, boolean jsonlines) throws IOException {
|
||||
this(new OutputStreamWriter(out, StandardCharsets.UTF_8), jsonlines);
|
||||
this(out, DEFAULT_BUFFER_SIZE, jsonlines);
|
||||
}
|
||||
|
||||
public MarcJsonWriter(OutputStream out, int bufferSize, boolean jsonlines) throws IOException {
|
||||
this(new OutputStreamWriter(out, StandardCharsets.UTF_8), bufferSize, jsonlines);
|
||||
}
|
||||
|
||||
public MarcJsonWriter(Writer writer) throws IOException {
|
||||
this(writer, false);
|
||||
this(writer, DEFAULT_BUFFER_SIZE, false);
|
||||
}
|
||||
|
||||
public MarcJsonWriter(Writer writer, boolean jsonlines) throws IOException {
|
||||
this.writer = new BufferedWriter(writer);
|
||||
public MarcJsonWriter(Writer writer, int bufferSize, boolean jsonlines) throws IOException {
|
||||
this.writer = new BufferedWriter(writer, bufferSize);
|
||||
this.bufferSize = bufferSize;
|
||||
this.jsonlines = jsonlines;
|
||||
this.lock = new ReentrantLock();
|
||||
this.sb = new StringBuilder();
|
||||
|
@ -103,10 +114,15 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
|
||||
public MarcJsonWriter(String fileNamePattern, int splitlimit) throws IOException {
|
||||
this(fileNamePattern, DEFAULT_BUFFER_SIZE, splitlimit);
|
||||
}
|
||||
|
||||
public MarcJsonWriter(String fileNamePattern, int bufferSize, int splitlimit) throws IOException {
|
||||
this.fileNameCounter = new AtomicInteger(0);
|
||||
this.fileNamePattern = fileNamePattern;
|
||||
this.splitlimit = splitlimit;
|
||||
this.writer = newWriter(fileNamePattern, fileNameCounter);
|
||||
this.writer = newWriter(fileNamePattern, fileNameCounter, bufferSize);
|
||||
this.bufferSize = bufferSize;
|
||||
this.lock = new ReentrantLock();
|
||||
this.sb = new StringBuilder();
|
||||
this.builder = Marc.builder();
|
||||
|
@ -114,10 +130,6 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
this.jsonlines = true;
|
||||
}
|
||||
|
||||
private static String escape(String value) {
|
||||
return value != null ? value.replaceAll("\"", "\\\"") : null;
|
||||
}
|
||||
|
||||
public MarcJsonWriter setFatalErrors(boolean fatalErrors) {
|
||||
this.fatalErrors = fatalErrors;
|
||||
return this;
|
||||
|
@ -129,6 +141,11 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
return this;
|
||||
}
|
||||
|
||||
public MarcJsonWriter setMarcValueTransformers(MarcValueTransformers marcValueTransformers) {
|
||||
super.setMarcValueTransformers(marcValueTransformers);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MarcJsonWriter setFormat(String format) {
|
||||
super.setFormat(format);
|
||||
|
@ -171,7 +188,11 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
@Override
|
||||
public void field(MarcField field) {
|
||||
super.field(field);
|
||||
builder.addField(field);
|
||||
MarcField marcField = field;
|
||||
if (marcValueTransformers != null) {
|
||||
marcField = marcValueTransformers.transformValue(field);
|
||||
}
|
||||
builder.addField(marcField);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -387,7 +408,7 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
try {
|
||||
endCollection();
|
||||
close();
|
||||
writer = newWriter(fileNamePattern, fileNameCounter);
|
||||
writer = newWriter(fileNamePattern, fileNameCounter, bufferSize);
|
||||
top = true;
|
||||
beginCollection();
|
||||
} catch (IOException e) {
|
||||
|
@ -397,8 +418,18 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
|||
}
|
||||
}
|
||||
|
||||
private static BufferedWriter newWriter(String fileNamePattern, AtomicInteger fileNameCounter) throws IOException {
|
||||
return Files.newBufferedWriter(Paths.get(String.format(fileNamePattern, fileNameCounter.getAndIncrement())));
|
||||
private static BufferedWriter newWriter(String fileNamePattern, AtomicInteger fileNameCounter, int bufferSize)
|
||||
throws IOException {
|
||||
String s = String.format(fileNamePattern, fileNameCounter.getAndIncrement());
|
||||
return new BufferedWriter(new FileWriter(s), bufferSize);
|
||||
}
|
||||
|
||||
private static final Pattern p = Pattern.compile("\"", Pattern.LITERAL);
|
||||
|
||||
private static final String replacement = "\\\"";
|
||||
|
||||
private static String escape(String value) {
|
||||
return p.matcher(value).replaceAll(Matcher.quoteReplacement(replacement));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -58,6 +58,9 @@ public class MarcValueTransformers {
|
|||
if (transformer != null) {
|
||||
MarcField.Builder builder = MarcField.builder();
|
||||
builder.tag(field.getTag()).indicator(field.getIndicator());
|
||||
if (field.getValue() != null) {
|
||||
builder.value(transformer.transform(field.getValue()));
|
||||
}
|
||||
field.getSubfields().forEach(subfield ->
|
||||
builder.subfield(subfield.getId(), transformer.transform(subfield.getValue())));
|
||||
return builder.build();
|
||||
|
|
|
@ -179,77 +179,6 @@ public class MarcTest extends Assert {
|
|||
recordIDs.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* ZDB MARC Bibliographic.
|
||||
*/
|
||||
@Test
|
||||
public void testZDBBib() throws Exception {
|
||||
String s = "zdbtitutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
File file = File.createTempFile(s + ".", ".xml");
|
||||
file.deleteOnExit();
|
||||
FileOutputStream out = new FileOutputStream(file);
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter(out)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
assertNull(writer.getException());
|
||||
}
|
||||
assertThat(file, CompareMatcher.isIdenticalTo(getClass().getResource(s + ".xml").openStream()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testZDBStream() throws IOException {
|
||||
String s = "zdblokutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
long count = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.build().iso2709Stream().chunks().count();
|
||||
in.close();
|
||||
assertEquals(10170L, count);
|
||||
|
||||
in = getClass().getResource(s).openStream();
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.build().iso2709Stream().chunks()
|
||||
.forEach(chunk -> assertTrue(chunk.data().length() >= 0));
|
||||
in.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* ZDB MARC Holdings.
|
||||
*/
|
||||
@Test
|
||||
public void testZDBLok() throws Exception {
|
||||
String s = "zdblokutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
File file = File.createTempFile(s + ".", ".xml");
|
||||
file.deleteOnExit();
|
||||
FileOutputStream out = new FileOutputStream(file);
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter(out)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
assertNull(writer.getException());
|
||||
}
|
||||
assertThat(file, CompareMatcher.isIdenticalTo(getClass().getResource(s + ".xml").openStream()));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Ther may be faulty input streams that contain information separators at the wrong place.
|
||||
* For the problem, see {@code org.marc4j.test.PermissiveReaderTest#testCyrillicEFix()}.
|
||||
|
|
|
@ -16,13 +16,26 @@
|
|||
*/
|
||||
package org.xbib.marc;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.xbib.helper.StreamMatcher.assertStream;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.xbib.marc.json.MarcJsonWriter;
|
||||
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
||||
import org.xbib.marc.xml.MarcXchangeWriter;
|
||||
import org.xmlunit.matchers.CompareMatcher;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.Normalizer;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -99,4 +112,72 @@ public class ZDBTest {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* ZDB MARC Bibliographic.
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testZDBBib() throws Exception {
|
||||
String s = "zdbtitutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
File file = File.createTempFile(s, ".json");
|
||||
file.deleteOnExit();
|
||||
OutputStream out = new FileOutputStream(file);
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
try (MarcJsonWriter writer = new MarcJsonWriter(out, true)
|
||||
.setFormat(MarcXchangeConstants.MARCXCHANGE_FORMAT)
|
||||
.setType(MarcXchangeConstants.BIBLIOGRAPHIC_TYPE)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
assertNull(writer.getException());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testZDBStream() throws IOException {
|
||||
String s = "zdblokutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
long count = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.build().iso2709Stream().chunks().count();
|
||||
in.close();
|
||||
assertEquals(10170L, count);
|
||||
|
||||
in = getClass().getResource(s).openStream();
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.build().iso2709Stream().chunks()
|
||||
.forEach(chunk -> assertTrue(chunk.data().length() >= 0));
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testZDBLok() throws Exception {
|
||||
String s = "zdblokutf8.mrc";
|
||||
InputStream in = getClass().getResource(s).openStream();
|
||||
File file = File.createTempFile(s + ".", ".xml");
|
||||
file.deleteOnExit();
|
||||
FileOutputStream out = new FileOutputStream(file);
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter(out)
|
||||
.setMarcValueTransformers(marcValueTransformers)) {
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.setMarcListener(writer)
|
||||
.build()
|
||||
.writeCollection();
|
||||
assertNull(writer.getException());
|
||||
}
|
||||
assertThat(file, CompareMatcher.isIdenticalTo(getClass().getResource(s + ".xml").openStream()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -178,13 +178,13 @@ public class MarcJsonWriterTest {
|
|||
.writeCollection();
|
||||
assertEquals(10, writer.getRecordCounter());
|
||||
File f0 = new File("build/0.json");
|
||||
assertTrue(f0.exists() && f0.length() == 6022);
|
||||
assertTrue(f0.exists() && f0.length() == 6015);
|
||||
File f1 = new File("build/1.json");
|
||||
assertTrue(f1.exists() && f1.length() == 7150);
|
||||
assertTrue(f1.exists() && f1.length() == 7127);
|
||||
File f2 = new File("build/2.json");
|
||||
assertTrue(f2.exists() && f2.length() == 6424);
|
||||
assertTrue(f2.exists() && f2.length() == 6426);
|
||||
File f3 = new File("build/3.json");
|
||||
assertTrue(f3.exists() && f3.length() == 2114);
|
||||
assertTrue(f3.exists() && f3.length() == 2110);
|
||||
File f4 = new File("build/4.json");
|
||||
assertFalse(f4.exists());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
package org.xbib.marc.transformer;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.xbib.marc.MarcField;
|
||||
import org.xbib.marc.transformer.value.MarcValueTransformer;
|
||||
import org.xbib.marc.transformer.value.MarcValueTransformers;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MarcValueTransformerTest {
|
||||
|
||||
@Test
|
||||
public void testValueTransformer() {
|
||||
MarcValueTransformer marcValueTransformer = new MarcValueTransformer() {
|
||||
@Override
|
||||
public String transform(String value) {
|
||||
return value.equals("World") ? "Earth" : value;
|
||||
}
|
||||
};
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(marcValueTransformer);
|
||||
MarcField a = MarcField.builder().tag("100").subfield("a", "Hello").subfield("b", "World").build();
|
||||
MarcField b = marcValueTransformers.transformValue(a);
|
||||
assertEquals("100$$ab[a=Hello, b=Earth]", b.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValueControlFieldTransformer() {
|
||||
MarcValueTransformer marcValueTransformer = new MarcValueTransformer() {
|
||||
@Override
|
||||
public String transform(String value) {
|
||||
return value.equals("World") ? "Earth" : value;
|
||||
}
|
||||
};
|
||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||
marcValueTransformers.setMarcValueTransformer(marcValueTransformer);
|
||||
MarcField a = MarcField.builder().tag("001").value("World").build();
|
||||
MarcField b = marcValueTransformers.transformValue(a);
|
||||
assertEquals("001$$Earth", b.toString());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in a new issue