add iterable XML MARCRecord
This commit is contained in:
parent
77bd0ac593
commit
b5d4913acf
5 changed files with 187 additions and 69 deletions
|
@ -1,5 +1,5 @@
|
|||
group = org.xbib
|
||||
name = marc
|
||||
version = 2.9.14
|
||||
version = 2.9.15
|
||||
|
||||
org.gradle.warning.mode = ALL
|
||||
|
|
|
@ -125,12 +125,12 @@ public final class Marc {
|
|||
}
|
||||
|
||||
/**
|
||||
* Run XML stream parser over an XML input stream, with an XML event consumer.
|
||||
* Run XML stream parser over an XML input stream with an XML event consumer.
|
||||
* @param xmlInputFactory the XML input factory
|
||||
* @param consumer the XML event consumer
|
||||
* @throws XMLStreamException if parsing fails
|
||||
*/
|
||||
public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
|
||||
public void parse(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
|
||||
Objects.requireNonNull(consumer);
|
||||
if (builder.getMarcListeners() != null) {
|
||||
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
|
||||
|
@ -144,6 +144,19 @@ public final class Marc {
|
|||
xmlEventReader.close();
|
||||
}
|
||||
|
||||
public void parseNextRecord(XMLEventReader xmlEventReader, MarcXchangeEventConsumer consumer) throws XMLStreamException {
|
||||
Objects.requireNonNull(consumer);
|
||||
if (builder.getMarcListeners() != null) {
|
||||
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
|
||||
consumer.setMarcListener(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) {
|
||||
consumer.add(xmlEventReader.nextEvent());
|
||||
}
|
||||
consumer.resetEndRecordReached();
|
||||
}
|
||||
|
||||
public BufferedSeparatorInputStream iso2709Stream() {
|
||||
return iso2709Stream(DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
|
@ -575,10 +588,6 @@ public final class Marc {
|
|||
builder.getMarcListener().endCollection();
|
||||
}
|
||||
}
|
||||
|
||||
public void parseRecords() throws IOException {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1221,6 +1230,75 @@ public final class Marc {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Iterator<MarcRecord> xmlRecordIterator() {
|
||||
return xmlRecordIterator(new MarcXchangeEventConsumer());
|
||||
}
|
||||
|
||||
public Iterator<MarcRecord> xmlRecordIterator(MarcXchangeEventConsumer consumer) {
|
||||
XMLEventReader xmlEventReader;
|
||||
try {
|
||||
xmlEventReader = XMLInputFactory.newFactory().createXMLEventReader(inputStream);
|
||||
} catch (XMLStreamException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
final MarcRecordAdapter marcRecordAdapter = new MarcRecordAdapter(new MarcRecordListener() {
|
||||
@Override
|
||||
public void beginCollection() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void record(MarcRecord record) {
|
||||
marcRecord = record;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endCollection() {
|
||||
}
|
||||
}, Comparator.naturalOrder());
|
||||
consumer.setMarcListener(marcRecordAdapter);
|
||||
return new Iterator<>() {
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
try {
|
||||
MarcRecord record;
|
||||
record(null);
|
||||
while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) {
|
||||
consumer.add(xmlEventReader.nextEvent());
|
||||
}
|
||||
consumer.resetEndRecordReached();
|
||||
record = getMarcRecord();
|
||||
if (record != null) {
|
||||
return true;
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MarcRecord next() {
|
||||
MarcRecord record = getMarcRecord();
|
||||
if (record == null) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
return record;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* For easy {@code for} statements.
|
||||
* @return iterable
|
||||
*/
|
||||
public Iterable<MarcRecord> xmlIterable() {
|
||||
return this::xmlRecordIterator;
|
||||
}
|
||||
|
||||
public Stream<MarcRecord> xmlRecordStream() {
|
||||
return StreamSupport.stream(xmlIterable().spliterator(), false);
|
||||
}
|
||||
|
||||
private MarcRecord getMarcRecord() {
|
||||
return marcRecord;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.xbib.marc;
|
|||
import org.xbib.marc.label.RecordLabel;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* The Marc record adapter collects Marc field events, collects them in a Marc builder,
|
||||
|
@ -33,6 +34,7 @@ public class MarcRecordAdapter implements MarcListener {
|
|||
private final Comparator<String> comparator;
|
||||
|
||||
public MarcRecordAdapter(MarcRecordListener marcRecordListener, Comparator<String> comparator) {
|
||||
Objects.requireNonNull(marcRecordListener);
|
||||
this.marcRecordListener = marcRecordListener;
|
||||
this.builder = Marc.builder();
|
||||
this.comparator = comparator;
|
||||
|
|
|
@ -45,13 +45,13 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
|
||||
private final Deque<MarcField.Builder> stack;
|
||||
|
||||
private final Map<String, MarcListener> listeners;
|
||||
private final Map<String, MarcListener> marcListeners;
|
||||
|
||||
private final StringBuilder content;
|
||||
|
||||
private MarcValueTransformers marcValueTransformers;
|
||||
|
||||
private MarcListener listener;
|
||||
|
||||
private final StringBuilder content;
|
||||
private MarcListener marcListener;
|
||||
|
||||
private String format;
|
||||
|
||||
|
@ -59,23 +59,26 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
|
||||
private final Set<String> validNamespaces;
|
||||
|
||||
private boolean endRecordReached;
|
||||
|
||||
public MarcXchangeEventConsumer() {
|
||||
this.stack = new LinkedList<>();
|
||||
this.listeners = new HashMap<>();
|
||||
this.marcListeners = new HashMap<>();
|
||||
this.content = new StringBuilder();
|
||||
this.format = MARC21_FORMAT;
|
||||
this.type = BIBLIOGRAPHIC_TYPE;
|
||||
this.validNamespaces = new HashSet<>();
|
||||
this.validNamespaces.addAll(Set.of(MARCXCHANGE_V1_NS_URI, MARCXCHANGE_V2_NS_URI, MARC21_SCHEMA_URI));
|
||||
this.endRecordReached = false;
|
||||
}
|
||||
|
||||
public MarcXchangeEventConsumer setMarcListener(String type, MarcListener listener) {
|
||||
this.listeners.put(type, listener);
|
||||
this.marcListeners.put(type, listener);
|
||||
return this;
|
||||
}
|
||||
|
||||
public MarcXchangeEventConsumer setMarcListener(MarcListener listener) {
|
||||
this.listeners.put(BIBLIOGRAPHIC_TYPE, listener);
|
||||
this.marcListeners.put(BIBLIOGRAPHIC_TYPE, listener);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -91,37 +94,38 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
|
||||
@Override
|
||||
public void beginCollection() {
|
||||
if (listener != null) {
|
||||
listener.beginCollection();
|
||||
if (marcListener != null) {
|
||||
marcListener.beginCollection();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endCollection() {
|
||||
if (listener != null) {
|
||||
listener.endCollection();
|
||||
if (marcListener != null) {
|
||||
marcListener.endCollection();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void beginRecord(String format, String type) {
|
||||
this.listener = listeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE);
|
||||
if (listener != null) {
|
||||
listener.beginRecord(format, type);
|
||||
this.marcListener = marcListeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE);
|
||||
if (marcListener != null) {
|
||||
marcListener.beginRecord(format, type);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endRecord() {
|
||||
if (listener != null) {
|
||||
listener.endRecord();
|
||||
if (marcListener != null) {
|
||||
marcListener.endRecord();
|
||||
}
|
||||
this.endRecordReached = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void leader(RecordLabel label) {
|
||||
if (listener != null) {
|
||||
listener.leader(label);
|
||||
if (marcListener != null) {
|
||||
marcListener.leader(label);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -131,8 +135,8 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
if (marcValueTransformers != null) {
|
||||
field = marcValueTransformers.transformValue(field);
|
||||
}
|
||||
if (listener != null) {
|
||||
listener.field(field);
|
||||
if (marcListener != null) {
|
||||
marcListener.field(field);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,7 +145,7 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
if (event.isStartElement()) {
|
||||
StartElement element = (StartElement) event;
|
||||
String uri = element.getName().getNamespaceURI();
|
||||
if (!isNamespace(uri)) {
|
||||
if (!validNamespaces.contains(uri)) {
|
||||
return;
|
||||
}
|
||||
String localName = element.getName().getLocalPart();
|
||||
|
@ -191,69 +195,57 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
}
|
||||
content.setLength(0);
|
||||
switch (localName) {
|
||||
case COLLECTION: {
|
||||
case COLLECTION -> {
|
||||
beginCollection();
|
||||
break;
|
||||
}
|
||||
case RECORD: {
|
||||
case RECORD -> {
|
||||
setFormat(thisformat);
|
||||
setType(thistype);
|
||||
beginRecord(thisformat, thistype);
|
||||
break;
|
||||
}
|
||||
case LEADER: {
|
||||
break;
|
||||
case LEADER -> {
|
||||
}
|
||||
case CONTROLFIELD:
|
||||
case DATAFIELD: {
|
||||
case CONTROLFIELD, DATAFIELD -> {
|
||||
MarcField.Builder builder = MarcField.builder().tag(tag);
|
||||
if (max > 0) {
|
||||
builder.indicator(sb.substring(min - 1, max));
|
||||
}
|
||||
stack.push(builder);
|
||||
break;
|
||||
}
|
||||
case SUBFIELD: {
|
||||
case SUBFIELD -> {
|
||||
stack.peek().subfield(code, null);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
default -> {
|
||||
}
|
||||
}
|
||||
} else if (event.isEndElement()) {
|
||||
EndElement element = (EndElement) event;
|
||||
String uri = element.getName().getNamespaceURI();
|
||||
if (!isNamespace(uri)) {
|
||||
if (!validNamespaces.contains(uri)) {
|
||||
return;
|
||||
}
|
||||
String localName = element.getName().getLocalPart();
|
||||
switch (localName) {
|
||||
case COLLECTION: {
|
||||
case COLLECTION -> {
|
||||
endCollection();
|
||||
break;
|
||||
}
|
||||
case RECORD: {
|
||||
case RECORD -> {
|
||||
endRecord();
|
||||
break;
|
||||
}
|
||||
case LEADER: {
|
||||
case LEADER -> {
|
||||
leader(RecordLabel.builder().from(content.toString().toCharArray()).build());
|
||||
break;
|
||||
}
|
||||
case CONTROLFIELD: {
|
||||
case CONTROLFIELD -> {
|
||||
field(transformValue(stack.pop().value(content.toString()).build()));
|
||||
break;
|
||||
}
|
||||
case DATAFIELD: {
|
||||
case DATAFIELD -> {
|
||||
field(transformValue(stack.pop().build()));
|
||||
break;
|
||||
}
|
||||
case SUBFIELD: {
|
||||
case SUBFIELD -> {
|
||||
stack.peek().subfieldValue(content.toString());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
default -> {
|
||||
}
|
||||
}
|
||||
content.setLength(0);
|
||||
} else if (event.isCharacters()) {
|
||||
|
@ -285,8 +277,12 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
|
|||
return this;
|
||||
}
|
||||
|
||||
private boolean isNamespace(String uri) {
|
||||
return validNamespaces.contains(uri);
|
||||
public boolean isEndRecordReached() {
|
||||
return endRecordReached;
|
||||
}
|
||||
|
||||
public void resetEndRecordReached() {
|
||||
endRecordReached = false;
|
||||
}
|
||||
|
||||
private MarcField transformValue(MarcField field) {
|
||||
|
|
|
@ -16,13 +16,19 @@
|
|||
package org.xbib.marc.xml;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.xbib.marc.Marc;
|
||||
import org.xbib.marc.MarcRecord;
|
||||
import org.xmlunit.matchers.CompareMatcher;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
|
||||
/**
|
||||
|
@ -30,6 +36,8 @@ import javax.xml.stream.XMLInputFactory;
|
|||
*/
|
||||
public class MarcEventConsumerTest {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(MarcEventConsumerTest.class.getName());
|
||||
|
||||
/**
|
||||
* Parsing XML by STAX (streaming XML) from Aleph publishing interface (hbz dialect).
|
||||
*
|
||||
|
@ -58,7 +66,7 @@ public class MarcEventConsumerTest {
|
|||
.setFormat("AlephXML")
|
||||
.setType("Bibliographic")
|
||||
.build()
|
||||
.parseEvents(XMLInputFactory.newFactory(), consumer);
|
||||
.parse(XMLInputFactory.newFactory(), consumer);
|
||||
writer.endCollection();
|
||||
writer.endDocument();
|
||||
sw.close();
|
||||
|
@ -72,17 +80,51 @@ public class MarcEventConsumerTest {
|
|||
InputStream in = getClass().getResourceAsStream(s);
|
||||
MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer();
|
||||
consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd");
|
||||
MarcXchangeWriter writer = new MarcXchangeWriter(consumer);
|
||||
writer.setFormat("AlephXML").setType("Bibliographic");
|
||||
writer.startDocument();
|
||||
Marc.builder()
|
||||
try (MarcXchangeWriter writer = new MarcXchangeWriter(consumer)) {
|
||||
writer.setFormat("AlephXML").setType("Bibliographic");
|
||||
writer.startDocument();
|
||||
Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.setFormat("AlephXML")
|
||||
.setType("Bibliographic")
|
||||
.build()
|
||||
.writeCollection();
|
||||
writer.endDocument();
|
||||
assertNull(writer.getException());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXmlIterable() {
|
||||
String s = "chabon.mrc.xml";
|
||||
InputStream in = getClass().getResourceAsStream(s);
|
||||
AtomicInteger count = new AtomicInteger();
|
||||
for (MarcRecord marcRecord : Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.setFormat("AlephXML")
|
||||
.setType("Bibliographic")
|
||||
.build()
|
||||
.writeCollection();
|
||||
writer.endDocument();
|
||||
assertNull(writer.getException());
|
||||
.xmlIterable()) {
|
||||
logger.log(Level.INFO, marcRecord.toString());
|
||||
count.incrementAndGet();
|
||||
}
|
||||
assertEquals(2, count.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXmlIterator() {
|
||||
String s = "HT016424175.xml";
|
||||
InputStream in = getClass().getResourceAsStream(s);
|
||||
MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer();
|
||||
consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd");
|
||||
Iterator<MarcRecord> iterator = Marc.builder()
|
||||
.setInputStream(in)
|
||||
.setCharset(StandardCharsets.UTF_8)
|
||||
.xmlRecordIterator(consumer);
|
||||
AtomicInteger count = new AtomicInteger();
|
||||
while (iterator.hasNext()) {
|
||||
logger.log(Level.INFO, iterator.next().toString());
|
||||
count.incrementAndGet();
|
||||
}
|
||||
assertEquals(1, count.get());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue