fixed validity check for subfield codes, changed MarcGenerator close() to flush() method, fixes for sonarqube, update to gradle 4.4

This commit is contained in:
Jörg Prante 2017-12-14 14:01:26 +01:00
parent 58c2a49293
commit 7fe42850dc
19 changed files with 225 additions and 194 deletions

View file

@ -64,13 +64,15 @@ test {
}
asciidoctor {
backends 'html5'
separateOutputDirs = false
attributes 'source-highlighter': 'coderay',
toc : '',
idprefix : '',
idseparator : '-',
stylesheet: "${projectDir}/src/docs/asciidoc/css/foundation.css"
attributes toc: 'left',
doctype: 'book',
icons: 'font',
encoding: 'utf-8',
sectlink: true,
sectanchors: true,
linkattrs: true,
imagesdir: 'img',
'source-highlighter': 'coderay'
}
javadoc {

View file

@ -1,6 +1,6 @@
group = org.xbib
name = marc
version = 1.0.17
version = 1.0.18
xbib-content.version = 1.0.7
xbib-bibliographic-character-sets.version = 1.0.0

Binary file not shown.

View file

@ -1,6 +1,6 @@
#Tue Dec 05 21:14:54 CET 2017
#Thu Dec 14 12:16:56 CET 2017
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.3.1-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-4.4-all.zip

View file

@ -5,10 +5,8 @@ Version 1.0
:toc: preamble
:toclevels: 4
:!toc-title: Content
:experimental:
:description: MARC bibliographic data processing
:keywords: MARC, Java, bibliographic data processing
:icons: font
== Introduction

View file

@ -314,7 +314,7 @@ public final class Marc {
count++;
}
stream.close();
builder.marcGenerator.close();
builder.marcGenerator.flush();
if (withCollection) {
marcListener.endCollection();
if (marcListener instanceof ContentHandler) {
@ -384,7 +384,7 @@ public final class Marc {
l.incrementAndGet();
});
stream.close();
builder.marcGenerator.close();
builder.marcGenerator.flush();
if (withCollection) {
marcRecordListener.endCollection();
if (marcRecordListener instanceof ContentHandler) {
@ -567,7 +567,7 @@ public final class Marc {
while ((chunk = stream.readChunk()) != null) {
marcGenerator.chunk(chunk);
}
marcGenerator.close();
marcGenerator.flush();
} finally {
builder.getInputStream().close();
}
@ -1100,18 +1100,19 @@ public final class Marc {
@Override
public boolean hasNext() {
try {
setMarcRecord(null);
MarcRecord record;
record(null);
Chunk<byte[], BytesReference> chunk;
while ((chunk = stream.readChunk()) != null) {
marcGenerator.chunk(chunk);
MarcRecord marcRecord = getMarcRecord();
if (marcRecord != null) {
record = getMarcRecord();
if (record != null) {
return true;
}
}
marcGenerator.close();
MarcRecord marcRecord = getMarcRecord();
if (marcRecord != null) {
marcGenerator.flush();
record = getMarcRecord();
if (record != null) {
return true;
}
} catch (IOException e) {
@ -1122,11 +1123,11 @@ public final class Marc {
@Override
public MarcRecord next() {
MarcRecord marcRecord = getMarcRecord();
if (marcRecord == null) {
MarcRecord record = getMarcRecord();
if (record == null) {
throw new NoSuchElementException();
}
return marcRecord;
return record;
}
};
}
@ -1158,10 +1159,6 @@ public final class Marc {
return this;
}
private void setMarcRecord(MarcRecord marcRecord) {
this.marcRecord = marcRecord;
}
private MarcRecord getMarcRecord() {
return marcRecord;
}

View file

@ -19,8 +19,12 @@ package org.xbib.marc;
import org.xbib.marc.dialects.mab.MabSubfieldControl;
import org.xbib.marc.label.RecordLabel;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@ -39,6 +43,21 @@ public class MarcField implements Comparable<MarcField> {
private static final String BLANK_STRING = " ";
private static final Set<Character> ASCII_GRAPHICS = new HashSet<>(Arrays.asList(
'\u0020', '\u0021', '\u0022', '\u0023', '\u0024', '\u0025', '\u0026', '\'',
'\u0028', '\u0029', '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'\u003A', '\u003B', '\u003C', '\u003D', '\u003E', '\u003F', '\u0040',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z',
'\u005B', '\\', '\u005D', '\u005E', '\u005F', '\u0060',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z',
'\u007B', '\u007C', '\u007D', '\u007E'
));
private final String tag;
private final String indicator;
@ -51,12 +70,12 @@ public class MarcField implements Comparable<MarcField> {
private final String subfieldIds;
private final LinkedList<Subfield> subfields;
private final Deque<Subfield> subfields;
private final boolean iscontrol;
private MarcField(String tag, String indicator, int position, int length,
String value, LinkedList<Subfield> subfields, String subfieldIds,
String value, Deque<Subfield> subfields, String subfieldIds,
boolean iscontrol) {
this.tag = tag;
this.indicator = indicator;
@ -120,7 +139,7 @@ public class MarcField implements Comparable<MarcField> {
* Return the subfields associated with this MARC field.
* @return a list of MARC subfields
*/
public LinkedList<Subfield> getSubfields() {
public Deque<Subfield> getSubfields() {
return subfields;
}
@ -129,7 +148,7 @@ public class MarcField implements Comparable<MarcField> {
* @param subfieldId subfield ID
* @return list of subfields
*/
public LinkedList<Subfield> getSubfield(String subfieldId) {
public Deque<Subfield> getSubfield(String subfieldId) {
return subfields.stream()
.filter(subfield -> subfield.getId().equals(subfieldId))
.collect(Collectors.toCollection(LinkedList::new));
@ -144,8 +163,8 @@ public class MarcField implements Comparable<MarcField> {
}
public String getFirstSubfieldValue(String subfieldId) {
LinkedList<Subfield> list = getSubfield(subfieldId);
return list.isEmpty() ? null : list.getFirst().getValue();
Deque<Subfield> deque = getSubfield(subfieldId);
return deque.isEmpty() ? null : deque.getFirst().getValue();
}
/**
@ -157,8 +176,8 @@ public class MarcField implements Comparable<MarcField> {
}
public String getLastSubfieldValue(String subfieldId) {
LinkedList<Subfield> list = getSubfield(subfieldId);
return list.isEmpty() ? null : list.getLast().getValue();
Deque<Subfield> deque = getSubfield(subfieldId);
return deque.isEmpty() ? null : deque.getLast().getValue();
}
/**
@ -229,13 +248,7 @@ public class MarcField implements Comparable<MarcField> {
}
boolean b = true;
for (int i = 0; i < subfieldIds.length(); i++) {
b = subfieldIds.charAt(i) == ' '
|| (subfieldIds.charAt(i) >= '0' && subfieldIds.charAt(i) <= '9')
|| (subfieldIds.charAt(i) >= 'a' && subfieldIds.charAt(i) <= 'z')
|| (subfieldIds.charAt(i) >= 'A' && subfieldIds.charAt(i) <= 'Z') // can appear in german MARC
|| subfieldIds.charAt(i) == '$' // can appear in german MARC
|| subfieldIds.charAt(i) == '=' // can appear in german MARC
;
b = ASCII_GRAPHICS.contains(subfieldIds.charAt(i));
if (!b) {
break;
}

View file

@ -22,25 +22,31 @@ package org.xbib.marc;
public class MarcFieldAdapter implements MarcListener {
@Override
public void beginCollection() {
// empty by design
}
@Override
public void beginRecord(String format, String type) {
// empty by design
}
@Override
public void leader(String label) {
// empty by design
}
@Override
public void field(MarcField field) {
// empty by design
}
@Override
public void endRecord() {
// empty by design
}
@Override
public void endCollection() {
// empty by design
}
}

View file

@ -29,7 +29,6 @@ import org.xbib.marc.transformer.MarcTransformer;
import org.xbib.marc.transformer.field.MarcFieldTransformers;
import org.xbib.marc.transformer.value.MarcValueTransformers;
import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedList;
@ -38,7 +37,7 @@ import java.util.List;
/**
* This chunk listener interprets the chunks from a stream and generates MARC events to a given MARC listener.
*/
public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Closeable {
public class MarcGenerator implements ChunkListener<byte[], BytesReference> {
private String format;
@ -180,7 +179,8 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
builder.indicator(data.substring(0, pos));
if (pos < data.length()) {
builder.value(this.data.substring(pos));
} }
}
}
found = true;
break;
} else if (directory.containsKey(position - offset)) {
@ -221,7 +221,7 @@ public class MarcGenerator implements ChunkListener<byte[], BytesReference>, Clo
* This method will emit the last record, if not emitted already.
* Useful if chunk streams have no closing record separator.
*/
public void close() throws IOException {
public void flush() {
if (position > 0) {
emitMarcRecord();
}

View file

@ -57,7 +57,7 @@ public class MarcWriter extends MarcContentHandler implements Flushable, Closeab
* @param charset the character set
* @throws IOException if writer can not be created
*/
public MarcWriter(OutputStream out, Charset charset) throws IOException {
public MarcWriter(OutputStream out, Charset charset) {
this(out, charset, DEFAULT_BUFFER_SIZE);
}
@ -68,7 +68,7 @@ public class MarcWriter extends MarcContentHandler implements Flushable, Closeab
* @param buffersize the buffer size writing to the underlying output stream
* @throws IOException if writer can not be created
*/
public MarcWriter(OutputStream out, Charset charset, int buffersize) throws IOException {
public MarcWriter(OutputStream out, Charset charset, int buffersize) {
this.out = new SeparatorOutputStream(out, buffersize);
this.charset = charset;
this.bytesStreamOutput = new BytesStreamOutput();

View file

@ -16,6 +16,9 @@ public class MabSubfieldControl {
FIELDS.put("856", 2);
}
private MabSubfieldControl() {
}
public static Integer getSubfieldIdLen(String tag) {
return FIELDS.getOrDefault(tag, 0);
}

View file

@ -24,7 +24,15 @@ import org.xbib.marc.label.RecordLabel;
import org.xbib.marc.transformer.value.MarcValueTransformers;
import org.xbib.marc.xml.MarcContentHandler;
import java.io.*;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
@ -97,23 +105,23 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
*/
private boolean top;
public MarcJsonWriter(OutputStream out) throws IOException {
public MarcJsonWriter(OutputStream out) {
this(out, Style.ARRAY);
}
public MarcJsonWriter(OutputStream out, Style style) throws IOException {
public MarcJsonWriter(OutputStream out, Style style) {
this(out, DEFAULT_BUFFER_SIZE, style);
}
public MarcJsonWriter(OutputStream out, int bufferSize, Style style) throws IOException {
public MarcJsonWriter(OutputStream out, int bufferSize, Style style) {
this(new OutputStreamWriter(out, StandardCharsets.UTF_8), style, bufferSize);
}
public MarcJsonWriter(Writer writer) throws IOException {
public MarcJsonWriter(Writer writer) {
this(writer, Style.ARRAY, DEFAULT_BUFFER_SIZE);
}
public MarcJsonWriter(Writer writer, Style style, int bufferSize) throws IOException {
public MarcJsonWriter(Writer writer, Style style, int bufferSize) {
this.writer = new BufferedWriter(writer, bufferSize);
this.bufferSize = bufferSize;
this.style = style;

View file

@ -45,7 +45,7 @@ public class MarcTool {
private String stylesheet = null;
private String result = null;
public static void main(String[] args) throws Exception {
public static void main(String[] args) {
MarcTool marcTool = new MarcTool();
marcTool.parse(args);
System.exit(marcTool.run());
@ -99,8 +99,7 @@ public class MarcTool {
if (mode == null) {
mode = "marc2xml";
}
switch (mode) {
case "marc2xml": {
if ("marc2xml".equals(mode)) {
try (InputStream in = Files.newInputStream(Paths.get(input));
MarcXchangeWriter writer = new MarcXchangeWriter(Files.newBufferedWriter(Paths.get(output)), true)) {
Marc.Builder builder = Marc.builder()
@ -119,8 +118,7 @@ public class MarcTool {
return 1;
}
return 0;
}
default: {
} else {
String help = "Usage: " + getClass().getName()
+ " --mode [marc2xml] set operation mode\n"
+ " --input <path> \n"
@ -134,4 +132,3 @@ public class MarcTool {
}
}
}
}

View file

@ -21,6 +21,7 @@ import static org.xbib.marc.transformer.field.MarcFieldTransformer.Operator.HEAD
import org.xbib.marc.MarcField;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Level;
@ -131,12 +132,13 @@ public class MarcFieldTransformer extends LinkedHashMap<String, MarcField> {
builder.subfield(subfield.getId(), subfield.getValue());
}
} else {
// map subfields
for (int i = 0; i < marcField.getSubfields().size(); i++) {
if (i < newMarcField.getSubfields().size()) {
builder.subfield(newMarcField.getSubfields().get(i).getId(),
marcField.getSubfields().get(i).getValue());
}
// transform subfields
Iterator<MarcField.Subfield> subfields = marcField.getSubfields().iterator();
Iterator<MarcField.Subfield> newSubfields = newMarcField.getSubfields().iterator();
while (subfields.hasNext() && newSubfields.hasNext()) {
MarcField.Subfield subfield = subfields.next();
MarcField.Subfield newSubfield = newSubfields.next();
builder.subfield(newSubfield.getId(), subfield.getValue());
}
}
}
@ -192,11 +194,12 @@ public class MarcFieldTransformer extends LinkedHashMap<String, MarcField> {
} else {
// get the correct MARC field to map subfield IDs
MarcField marcField1 = get(key);
for (int i = 0; i < marcField.getSubfields().size(); i++) {
if (i < marcField1.getSubfields().size()) {
builder.subfield(marcField1.getSubfields().get(i).getId(),
marcField.getSubfields().get(i).getValue());
}
Iterator<MarcField.Subfield> subfields = marcField.getSubfields().iterator();
Iterator<MarcField.Subfield> newSubfields = marcField1.getSubfields().iterator();
while (subfields.hasNext() && newSubfields.hasNext()) {
MarcField.Subfield subfield = subfields.next();
MarcField.Subfield newSubfield = newSubfields.next();
builder.subfield(newSubfield.getId(), subfield.getValue());
}
}
lastBuilt = builder.build();

View file

@ -25,12 +25,14 @@ import java.util.Map;
*/
public class MarcValueTransformers {
private static final String DEFAULT = "_default";
private final Map<String, MarcValueTransformer> marcValueTransformerMap = new HashMap<>();
private final Map<String, String> subfieldMap = new HashMap<>();
public MarcValueTransformers setMarcValueTransformer(MarcValueTransformer transformer) {
this.marcValueTransformerMap.put("_default", transformer);
this.marcValueTransformerMap.put(DEFAULT, transformer);
return this;
}
@ -62,7 +64,7 @@ public class MarcValueTransformers {
return field;
}
final MarcValueTransformer transformer = marcValueTransformerMap.containsKey(key) ?
marcValueTransformerMap.get(key) : marcValueTransformerMap.get("_default");
marcValueTransformerMap.get(key) : marcValueTransformerMap.get(DEFAULT);
if (transformer != null) {
MarcField.Builder builder = MarcField.builder();
builder.tag(field.getTag()).indicator(field.getIndicator());
@ -80,7 +82,7 @@ public class MarcValueTransformers {
}
public String transform(String value) {
MarcValueTransformer marcValueTransformer = marcValueTransformerMap.get("_default");
MarcValueTransformer marcValueTransformer = marcValueTransformerMap.get(DEFAULT);
return marcValueTransformer != null ? marcValueTransformer.transform(value) : value;
}
}

View file

@ -29,7 +29,7 @@ public class MarcFieldTest extends Assert {
@Test
public void testFieldData() {
MarcField marcField = MarcField.builder().tag("100").indicator("").value("Hello World").build();
assertEquals(marcField.getValue(), "Hello World");
assertEquals("Hello World", marcField.getValue());
}
@Test
@ -159,7 +159,7 @@ public class MarcFieldTest extends Assert {
MarcField marcField = MarcField.builder()
.tag("100")
.indicator("0")
.subfield("-", null)
.subfield("\u007f", null)
.build();
assertFalse(marcField.isSubfieldValid());
}

View file

@ -92,7 +92,7 @@ public class MarcRecordTest extends Assert {
@Test
public void testFilterKeyIterable() throws Exception {
String s = "summerland.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"));
@ -101,13 +101,13 @@ public class MarcRecordTest extends Assert {
// single 245 field
assertEquals(1, marcRecord.filterKey(Pattern.compile("^245.*")).size());
}
in.close();
}
}
@Test
public void testFilterKey() throws Exception {
String s = "summerland.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"))
@ -116,26 +116,26 @@ public class MarcRecordTest extends Assert {
for (MarcRecord marcRecord : builder.iterable()) {
assertEquals(1, marcRecord.getFields().size());
}
in.close();
}
}
@Test
public void testFilterValueIterable() throws Exception {
String s = "summerland.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"));
for (MarcRecord marcRecord : builder.iterable()) {
assertEquals(2, marcRecord.filterValue(Pattern.compile(".*?Chabon.*")).size());
}
in.close();
}
}
@Test
public void testFilterValue() throws Exception {
String s = "summerland.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(Charset.forName("ANSEL"))
@ -143,13 +143,13 @@ public class MarcRecordTest extends Assert {
for (MarcRecord marcRecord : builder.iterable()) {
assertEquals(2, marcRecord.getFields().size());
}
in.close();
}
}
@Test
public void testSequentialIteration() throws Exception {
String s = "dialects/unimarc/periouni.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in).setCharset(StandardCharsets.UTF_8);
final AtomicInteger count = new AtomicInteger();
@ -157,21 +157,21 @@ public class MarcRecordTest extends Assert {
for (MarcRecord marcRecord : builder.iterable()) {
count.incrementAndGet();
}
in.close();
assertEquals(3064, count.get());
}
}
@Test
public void testRecordStream() throws Exception {
String s = "dialects/unimarc/periouni.mrc";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
Marc.Builder builder = Marc.builder()
.setInputStream(in)
.setCharset(StandardCharsets.UTF_8);
long count = builder.recordStream().map(r -> r.get("001")).count();
in.close();
assertEquals(3064, count);
}
}
/**
* Test MarcXchangeWriter as record listener. Result must be the same as with field listener.
@ -179,7 +179,7 @@ public class MarcRecordTest extends Assert {
@Test
public void testIRMARC8AsRecordStream() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
File file = File.createTempFile(s + ".", ".xml");
file.deleteOnExit();
FileOutputStream out = new FileOutputStream(file);
@ -197,11 +197,12 @@ public class MarcRecordTest extends Assert {
}
assertThat(file, CompareMatcher.isIdenticalTo(getClass().getResource(s + ".xml").openStream()));
}
}
@Test
public void testIRMARC8AsLightweightRecordAdapter() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource(s).openStream();
try (InputStream in = getClass().getResource(s).openStream()) {
File file = File.createTempFile(s + ".", ".xml");
file.deleteOnExit();
FileOutputStream out = new FileOutputStream(file);
@ -221,5 +222,6 @@ public class MarcRecordTest extends Assert {
}
assertThat(file, CompareMatcher.isIdenticalTo(getClass().getResource(s + ".xml").openStream()));
}
}
}

View file

@ -1,5 +1,8 @@
package org.xbib.marc.dialects.mab;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
@ -9,9 +12,6 @@ import java.nio.charset.Charset;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
*
*/

View file

@ -89,11 +89,11 @@ public class BufferedSeparatorInputStreamTest {
listener.chunk(chunk);
}
in.close();
assertEquals(unitCount, 23);
assertEquals(groupCount, 9);
assertEquals(dataCount, 389);
assertEquals(recordCount, 356);
assertEquals(fileCount, 1);
assertEquals(23, unitCount);
assertEquals(9, groupCount);
assertEquals(389, dataCount);
assertEquals(356, recordCount);
assertEquals(1, fileCount);
}
@Test