add iterable XML MARCRecord

This commit is contained in:
Jörg Prante 2023-01-23 14:26:47 +01:00
parent 77bd0ac593
commit b5d4913acf
5 changed files with 187 additions and 69 deletions

View file

@ -1,5 +1,5 @@
group = org.xbib
name = marc
version = 2.9.14
version = 2.9.15
org.gradle.warning.mode = ALL

View file

@ -125,12 +125,12 @@ public final class Marc {
}
/**
* Run XML stream parser over an XML input stream, with an XML event consumer.
* Run XML stream parser over an XML input stream with an XML event consumer.
* @param xmlInputFactory the XML input factory
* @param consumer the XML event consumer
* @throws XMLStreamException if parsing fails
*/
public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
public void parse(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
Objects.requireNonNull(consumer);
if (builder.getMarcListeners() != null) {
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
@ -144,6 +144,19 @@ public final class Marc {
xmlEventReader.close();
}
public void parseNextRecord(XMLEventReader xmlEventReader, MarcXchangeEventConsumer consumer) throws XMLStreamException {
Objects.requireNonNull(consumer);
if (builder.getMarcListeners() != null) {
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
consumer.setMarcListener(entry.getKey(), entry.getValue());
}
}
while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) {
consumer.add(xmlEventReader.nextEvent());
}
consumer.resetEndRecordReached();
}
public BufferedSeparatorInputStream iso2709Stream() {
return iso2709Stream(DEFAULT_BUFFER_SIZE);
}
@ -575,10 +588,6 @@ public final class Marc {
builder.getMarcListener().endCollection();
}
}
public void parseRecords() throws IOException {
}
}
/**
@ -1221,6 +1230,75 @@ public final class Marc {
return this;
}
public Iterator<MarcRecord> xmlRecordIterator() {
return xmlRecordIterator(new MarcXchangeEventConsumer());
}
public Iterator<MarcRecord> xmlRecordIterator(MarcXchangeEventConsumer consumer) {
XMLEventReader xmlEventReader;
try {
xmlEventReader = XMLInputFactory.newFactory().createXMLEventReader(inputStream);
} catch (XMLStreamException e) {
throw new IllegalStateException(e);
}
final MarcRecordAdapter marcRecordAdapter = new MarcRecordAdapter(new MarcRecordListener() {
@Override
public void beginCollection() {
}
@Override
public void record(MarcRecord record) {
marcRecord = record;
}
@Override
public void endCollection() {
}
}, Comparator.naturalOrder());
consumer.setMarcListener(marcRecordAdapter);
return new Iterator<>() {
@Override
public boolean hasNext() {
try {
MarcRecord record;
record(null);
while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) {
consumer.add(xmlEventReader.nextEvent());
}
consumer.resetEndRecordReached();
record = getMarcRecord();
if (record != null) {
return true;
}
} catch (XMLStreamException e) {
throw new IllegalStateException(e);
}
return false;
}
@Override
public MarcRecord next() {
MarcRecord record = getMarcRecord();
if (record == null) {
throw new NoSuchElementException();
}
return record;
}
};
}
/**
* For easy {@code for} statements.
* @return iterable
*/
public Iterable<MarcRecord> xmlIterable() {
return this::xmlRecordIterator;
}
public Stream<MarcRecord> xmlRecordStream() {
return StreamSupport.stream(xmlIterable().spliterator(), false);
}
private MarcRecord getMarcRecord() {
return marcRecord;
}

View file

@ -18,6 +18,7 @@ package org.xbib.marc;
import org.xbib.marc.label.RecordLabel;
import java.util.Comparator;
import java.util.Objects;
/**
* The Marc record adapter collects Marc field events, collects them in a Marc builder,
@ -33,6 +34,7 @@ public class MarcRecordAdapter implements MarcListener {
private final Comparator<String> comparator;
public MarcRecordAdapter(MarcRecordListener marcRecordListener, Comparator<String> comparator) {
Objects.requireNonNull(marcRecordListener);
this.marcRecordListener = marcRecordListener;
this.builder = Marc.builder();
this.comparator = comparator;

View file

@ -45,13 +45,13 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
private final Deque<MarcField.Builder> stack;
private final Map<String, MarcListener> listeners;
private final Map<String, MarcListener> marcListeners;
private final StringBuilder content;
private MarcValueTransformers marcValueTransformers;
private MarcListener listener;
private final StringBuilder content;
private MarcListener marcListener;
private String format;
@ -59,23 +59,26 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
private final Set<String> validNamespaces;
private boolean endRecordReached;
public MarcXchangeEventConsumer() {
this.stack = new LinkedList<>();
this.listeners = new HashMap<>();
this.marcListeners = new HashMap<>();
this.content = new StringBuilder();
this.format = MARC21_FORMAT;
this.type = BIBLIOGRAPHIC_TYPE;
this.validNamespaces = new HashSet<>();
this.validNamespaces.addAll(Set.of(MARCXCHANGE_V1_NS_URI, MARCXCHANGE_V2_NS_URI, MARC21_SCHEMA_URI));
this.endRecordReached = false;
}
public MarcXchangeEventConsumer setMarcListener(String type, MarcListener listener) {
this.listeners.put(type, listener);
this.marcListeners.put(type, listener);
return this;
}
public MarcXchangeEventConsumer setMarcListener(MarcListener listener) {
this.listeners.put(BIBLIOGRAPHIC_TYPE, listener);
this.marcListeners.put(BIBLIOGRAPHIC_TYPE, listener);
return this;
}
@ -91,37 +94,38 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
@Override
public void beginCollection() {
if (listener != null) {
listener.beginCollection();
if (marcListener != null) {
marcListener.beginCollection();
}
}
@Override
public void endCollection() {
if (listener != null) {
listener.endCollection();
if (marcListener != null) {
marcListener.endCollection();
}
}
@Override
public void beginRecord(String format, String type) {
this.listener = listeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE);
if (listener != null) {
listener.beginRecord(format, type);
this.marcListener = marcListeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE);
if (marcListener != null) {
marcListener.beginRecord(format, type);
}
}
@Override
public void endRecord() {
if (listener != null) {
listener.endRecord();
if (marcListener != null) {
marcListener.endRecord();
}
this.endRecordReached = true;
}
@Override
public void leader(RecordLabel label) {
if (listener != null) {
listener.leader(label);
if (marcListener != null) {
marcListener.leader(label);
}
}
@ -131,8 +135,8 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
if (marcValueTransformers != null) {
field = marcValueTransformers.transformValue(field);
}
if (listener != null) {
listener.field(field);
if (marcListener != null) {
marcListener.field(field);
}
}
@ -141,7 +145,7 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
if (event.isStartElement()) {
StartElement element = (StartElement) event;
String uri = element.getName().getNamespaceURI();
if (!isNamespace(uri)) {
if (!validNamespaces.contains(uri)) {
return;
}
String localName = element.getName().getLocalPart();
@ -191,69 +195,57 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
}
content.setLength(0);
switch (localName) {
case COLLECTION: {
case COLLECTION -> {
beginCollection();
break;
}
case RECORD: {
case RECORD -> {
setFormat(thisformat);
setType(thistype);
beginRecord(thisformat, thistype);
break;
}
case LEADER: {
break;
case LEADER -> {
}
case CONTROLFIELD:
case DATAFIELD: {
case CONTROLFIELD, DATAFIELD -> {
MarcField.Builder builder = MarcField.builder().tag(tag);
if (max > 0) {
builder.indicator(sb.substring(min - 1, max));
}
stack.push(builder);
break;
}
case SUBFIELD: {
case SUBFIELD -> {
stack.peek().subfield(code, null);
break;
}
default:
break;
default -> {
}
}
} else if (event.isEndElement()) {
EndElement element = (EndElement) event;
String uri = element.getName().getNamespaceURI();
if (!isNamespace(uri)) {
if (!validNamespaces.contains(uri)) {
return;
}
String localName = element.getName().getLocalPart();
switch (localName) {
case COLLECTION: {
case COLLECTION -> {
endCollection();
break;
}
case RECORD: {
case RECORD -> {
endRecord();
break;
}
case LEADER: {
case LEADER -> {
leader(RecordLabel.builder().from(content.toString().toCharArray()).build());
break;
}
case CONTROLFIELD: {
case CONTROLFIELD -> {
field(transformValue(stack.pop().value(content.toString()).build()));
break;
}
case DATAFIELD: {
case DATAFIELD -> {
field(transformValue(stack.pop().build()));
break;
}
case SUBFIELD: {
case SUBFIELD -> {
stack.peek().subfieldValue(content.toString());
break;
}
default:
break;
default -> {
}
}
content.setLength(0);
} else if (event.isCharacters()) {
@ -285,8 +277,12 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo
return this;
}
private boolean isNamespace(String uri) {
return validNamespaces.contains(uri);
public boolean isEndRecordReached() {
return endRecordReached;
}
public void resetEndRecordReached() {
endRecordReached = false;
}
private MarcField transformValue(MarcField field) {

View file

@ -16,13 +16,19 @@
package org.xbib.marc.xml;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import org.junit.jupiter.api.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.MarcRecord;
import org.xmlunit.matchers.CompareMatcher;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.stream.XMLInputFactory;
/**
@ -30,6 +36,8 @@ import javax.xml.stream.XMLInputFactory;
*/
public class MarcEventConsumerTest {
private static final Logger logger = Logger.getLogger(MarcEventConsumerTest.class.getName());
/**
* Parsing XML by STAX (streaming XML) from Aleph publishing interface (hbz dialect).
*
@ -58,7 +66,7 @@ public class MarcEventConsumerTest {
.setFormat("AlephXML")
.setType("Bibliographic")
.build()
.parseEvents(XMLInputFactory.newFactory(), consumer);
.parse(XMLInputFactory.newFactory(), consumer);
writer.endCollection();
writer.endDocument();
sw.close();
@ -72,7 +80,7 @@ public class MarcEventConsumerTest {
InputStream in = getClass().getResourceAsStream(s);
MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer();
consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd");
MarcXchangeWriter writer = new MarcXchangeWriter(consumer);
try (MarcXchangeWriter writer = new MarcXchangeWriter(consumer)) {
writer.setFormat("AlephXML").setType("Bibliographic");
writer.startDocument();
Marc.builder()
@ -85,4 +93,38 @@ public class MarcEventConsumerTest {
writer.endDocument();
assertNull(writer.getException());
}
}
@Test
public void testXmlIterable() {
String s = "chabon.mrc.xml";
InputStream in = getClass().getResourceAsStream(s);
AtomicInteger count = new AtomicInteger();
for (MarcRecord marcRecord : Marc.builder()
.setInputStream(in)
.setCharset(StandardCharsets.UTF_8)
.xmlIterable()) {
logger.log(Level.INFO, marcRecord.toString());
count.incrementAndGet();
}
assertEquals(2, count.get());
}
@Test
public void testXmlIterator() {
String s = "HT016424175.xml";
InputStream in = getClass().getResourceAsStream(s);
MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer();
consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd");
Iterator<MarcRecord> iterator = Marc.builder()
.setInputStream(in)
.setCharset(StandardCharsets.UTF_8)
.xmlRecordIterator(consumer);
AtomicInteger count = new AtomicInteger();
while (iterator.hasNext()) {
logger.log(Level.INFO, iterator.next().toString());
count.incrementAndGet();
}
assertEquals(1, count.get());
}
}