From b5d4913acf0e32998f56a948492f7d67b831c57c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Mon, 23 Jan 2023 14:26:47 +0100 Subject: [PATCH] add iterable XML MARCRecord --- gradle.properties | 2 +- src/main/java/org/xbib/marc/Marc.java | 90 +++++++++++++++-- .../java/org/xbib/marc/MarcRecordAdapter.java | 2 + .../marc/xml/MarcXchangeEventConsumer.java | 98 +++++++++---------- .../xbib/marc/xml/MarcEventConsumerTest.java | 64 +++++++++--- 5 files changed, 187 insertions(+), 69 deletions(-) diff --git a/gradle.properties b/gradle.properties index 1511a73..38ea9c4 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib name = marc -version = 2.9.14 +version = 2.9.15 org.gradle.warning.mode = ALL diff --git a/src/main/java/org/xbib/marc/Marc.java b/src/main/java/org/xbib/marc/Marc.java index 7b69eda..bede9b0 100644 --- a/src/main/java/org/xbib/marc/Marc.java +++ b/src/main/java/org/xbib/marc/Marc.java @@ -125,12 +125,12 @@ public final class Marc { } /** - * Run XML stream parser over an XML input stream, with an XML event consumer. + * Run XML stream parser over an XML input stream with an XML event consumer. * @param xmlInputFactory the XML input factory * @param consumer the XML event consumer * @throws XMLStreamException if parsing fails */ - public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException { + public void parse(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException { Objects.requireNonNull(consumer); if (builder.getMarcListeners() != null) { for (Map.Entry entry : builder.getMarcListeners().entrySet()) { @@ -144,6 +144,19 @@ public final class Marc { xmlEventReader.close(); } + public void parseNextRecord(XMLEventReader xmlEventReader, MarcXchangeEventConsumer consumer) throws XMLStreamException { + Objects.requireNonNull(consumer); + if (builder.getMarcListeners() != null) { + for (Map.Entry entry : builder.getMarcListeners().entrySet()) { + consumer.setMarcListener(entry.getKey(), entry.getValue()); + } + } + while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) { + consumer.add(xmlEventReader.nextEvent()); + } + consumer.resetEndRecordReached(); + } + public BufferedSeparatorInputStream iso2709Stream() { return iso2709Stream(DEFAULT_BUFFER_SIZE); } @@ -575,10 +588,6 @@ public final class Marc { builder.getMarcListener().endCollection(); } } - - public void parseRecords() throws IOException { - - } } /** @@ -1221,6 +1230,75 @@ public final class Marc { return this; } + public Iterator xmlRecordIterator() { + return xmlRecordIterator(new MarcXchangeEventConsumer()); + } + + public Iterator xmlRecordIterator(MarcXchangeEventConsumer consumer) { + XMLEventReader xmlEventReader; + try { + xmlEventReader = XMLInputFactory.newFactory().createXMLEventReader(inputStream); + } catch (XMLStreamException e) { + throw new IllegalStateException(e); + } + final MarcRecordAdapter marcRecordAdapter = new MarcRecordAdapter(new MarcRecordListener() { + @Override + public void beginCollection() { + } + + @Override + public void record(MarcRecord record) { + marcRecord = record; + } + + @Override + public void endCollection() { + } + }, Comparator.naturalOrder()); + consumer.setMarcListener(marcRecordAdapter); + return new Iterator<>() { + @Override + public boolean hasNext() { + try { + MarcRecord record; + record(null); + while (xmlEventReader.hasNext() && !consumer.isEndRecordReached()) { + consumer.add(xmlEventReader.nextEvent()); + } + consumer.resetEndRecordReached(); + record = getMarcRecord(); + if (record != null) { + return true; + } + } catch (XMLStreamException e) { + throw new IllegalStateException(e); + } + return false; + } + + @Override + public MarcRecord next() { + MarcRecord record = getMarcRecord(); + if (record == null) { + throw new NoSuchElementException(); + } + return record; + } + }; + } + + /** + * For easy {@code for} statements. + * @return iterable + */ + public Iterable xmlIterable() { + return this::xmlRecordIterator; + } + + public Stream xmlRecordStream() { + return StreamSupport.stream(xmlIterable().spliterator(), false); + } + private MarcRecord getMarcRecord() { return marcRecord; } diff --git a/src/main/java/org/xbib/marc/MarcRecordAdapter.java b/src/main/java/org/xbib/marc/MarcRecordAdapter.java index 585f1f0..3537043 100644 --- a/src/main/java/org/xbib/marc/MarcRecordAdapter.java +++ b/src/main/java/org/xbib/marc/MarcRecordAdapter.java @@ -18,6 +18,7 @@ package org.xbib.marc; import org.xbib.marc.label.RecordLabel; import java.util.Comparator; +import java.util.Objects; /** * The Marc record adapter collects Marc field events, collects them in a Marc builder, @@ -33,6 +34,7 @@ public class MarcRecordAdapter implements MarcListener { private final Comparator comparator; public MarcRecordAdapter(MarcRecordListener marcRecordListener, Comparator comparator) { + Objects.requireNonNull(marcRecordListener); this.marcRecordListener = marcRecordListener; this.builder = Marc.builder(); this.comparator = comparator; diff --git a/src/main/java/org/xbib/marc/xml/MarcXchangeEventConsumer.java b/src/main/java/org/xbib/marc/xml/MarcXchangeEventConsumer.java index 3df4181..91db10b 100644 --- a/src/main/java/org/xbib/marc/xml/MarcXchangeEventConsumer.java +++ b/src/main/java/org/xbib/marc/xml/MarcXchangeEventConsumer.java @@ -45,13 +45,13 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo private final Deque stack; - private final Map listeners; + private final Map marcListeners; + + private final StringBuilder content; private MarcValueTransformers marcValueTransformers; - private MarcListener listener; - - private final StringBuilder content; + private MarcListener marcListener; private String format; @@ -59,23 +59,26 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo private final Set validNamespaces; + private boolean endRecordReached; + public MarcXchangeEventConsumer() { this.stack = new LinkedList<>(); - this.listeners = new HashMap<>(); + this.marcListeners = new HashMap<>(); this.content = new StringBuilder(); this.format = MARC21_FORMAT; this.type = BIBLIOGRAPHIC_TYPE; this.validNamespaces = new HashSet<>(); this.validNamespaces.addAll(Set.of(MARCXCHANGE_V1_NS_URI, MARCXCHANGE_V2_NS_URI, MARC21_SCHEMA_URI)); + this.endRecordReached = false; } public MarcXchangeEventConsumer setMarcListener(String type, MarcListener listener) { - this.listeners.put(type, listener); + this.marcListeners.put(type, listener); return this; } public MarcXchangeEventConsumer setMarcListener(MarcListener listener) { - this.listeners.put(BIBLIOGRAPHIC_TYPE, listener); + this.marcListeners.put(BIBLIOGRAPHIC_TYPE, listener); return this; } @@ -91,37 +94,38 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo @Override public void beginCollection() { - if (listener != null) { - listener.beginCollection(); + if (marcListener != null) { + marcListener.beginCollection(); } } @Override public void endCollection() { - if (listener != null) { - listener.endCollection(); + if (marcListener != null) { + marcListener.endCollection(); } } @Override public void beginRecord(String format, String type) { - this.listener = listeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE); - if (listener != null) { - listener.beginRecord(format, type); + this.marcListener = marcListeners.get(type != null ? type : BIBLIOGRAPHIC_TYPE); + if (marcListener != null) { + marcListener.beginRecord(format, type); } } @Override public void endRecord() { - if (listener != null) { - listener.endRecord(); + if (marcListener != null) { + marcListener.endRecord(); } + this.endRecordReached = true; } @Override public void leader(RecordLabel label) { - if (listener != null) { - listener.leader(label); + if (marcListener != null) { + marcListener.leader(label); } } @@ -131,8 +135,8 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo if (marcValueTransformers != null) { field = marcValueTransformers.transformValue(field); } - if (listener != null) { - listener.field(field); + if (marcListener != null) { + marcListener.field(field); } } @@ -141,7 +145,7 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo if (event.isStartElement()) { StartElement element = (StartElement) event; String uri = element.getName().getNamespaceURI(); - if (!isNamespace(uri)) { + if (!validNamespaces.contains(uri)) { return; } String localName = element.getName().getLocalPart(); @@ -191,69 +195,57 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo } content.setLength(0); switch (localName) { - case COLLECTION: { + case COLLECTION -> { beginCollection(); - break; } - case RECORD: { + case RECORD -> { setFormat(thisformat); setType(thistype); beginRecord(thisformat, thistype); - break; } - case LEADER: { - break; + case LEADER -> { } - case CONTROLFIELD: - case DATAFIELD: { + case CONTROLFIELD, DATAFIELD -> { MarcField.Builder builder = MarcField.builder().tag(tag); if (max > 0) { builder.indicator(sb.substring(min - 1, max)); } stack.push(builder); - break; } - case SUBFIELD: { + case SUBFIELD -> { stack.peek().subfield(code, null); - break; } - default: - break; + default -> { + } } } else if (event.isEndElement()) { EndElement element = (EndElement) event; String uri = element.getName().getNamespaceURI(); - if (!isNamespace(uri)) { + if (!validNamespaces.contains(uri)) { return; } String localName = element.getName().getLocalPart(); switch (localName) { - case COLLECTION: { + case COLLECTION -> { endCollection(); - break; } - case RECORD: { + case RECORD -> { endRecord(); - break; } - case LEADER: { + case LEADER -> { leader(RecordLabel.builder().from(content.toString().toCharArray()).build()); - break; } - case CONTROLFIELD: { + case CONTROLFIELD -> { field(transformValue(stack.pop().value(content.toString()).build())); - break; } - case DATAFIELD: { + case DATAFIELD -> { field(transformValue(stack.pop().build())); - break; } - case SUBFIELD: { + case SUBFIELD -> { stack.peek().subfieldValue(content.toString()); - break; } - default: - break; + default -> { + } } content.setLength(0); } else if (event.isCharacters()) { @@ -285,8 +277,12 @@ public class MarcXchangeEventConsumer implements XMLEventConsumer, MarcXchangeCo return this; } - private boolean isNamespace(String uri) { - return validNamespaces.contains(uri); + public boolean isEndRecordReached() { + return endRecordReached; + } + + public void resetEndRecordReached() { + endRecordReached = false; } private MarcField transformValue(MarcField field) { diff --git a/src/test/java/org/xbib/marc/xml/MarcEventConsumerTest.java b/src/test/java/org/xbib/marc/xml/MarcEventConsumerTest.java index b89a102..e0df370 100644 --- a/src/test/java/org/xbib/marc/xml/MarcEventConsumerTest.java +++ b/src/test/java/org/xbib/marc/xml/MarcEventConsumerTest.java @@ -16,13 +16,19 @@ package org.xbib.marc.xml; import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import org.junit.jupiter.api.Test; import org.xbib.marc.Marc; +import org.xbib.marc.MarcRecord; import org.xmlunit.matchers.CompareMatcher; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; import javax.xml.stream.XMLInputFactory; /** @@ -30,6 +36,8 @@ import javax.xml.stream.XMLInputFactory; */ public class MarcEventConsumerTest { + private static final Logger logger = Logger.getLogger(MarcEventConsumerTest.class.getName()); + /** * Parsing XML by STAX (streaming XML) from Aleph publishing interface (hbz dialect). * @@ -58,7 +66,7 @@ public class MarcEventConsumerTest { .setFormat("AlephXML") .setType("Bibliographic") .build() - .parseEvents(XMLInputFactory.newFactory(), consumer); + .parse(XMLInputFactory.newFactory(), consumer); writer.endCollection(); writer.endDocument(); sw.close(); @@ -72,17 +80,51 @@ public class MarcEventConsumerTest { InputStream in = getClass().getResourceAsStream(s); MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer(); consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd"); - MarcXchangeWriter writer = new MarcXchangeWriter(consumer); - writer.setFormat("AlephXML").setType("Bibliographic"); - writer.startDocument(); - Marc.builder() + try (MarcXchangeWriter writer = new MarcXchangeWriter(consumer)) { + writer.setFormat("AlephXML").setType("Bibliographic"); + writer.startDocument(); + Marc.builder() + .setInputStream(in) + .setCharset(StandardCharsets.UTF_8) + .setFormat("AlephXML") + .setType("Bibliographic") + .build() + .writeCollection(); + writer.endDocument(); + assertNull(writer.getException()); + } + } + + @Test + public void testXmlIterable() { + String s = "chabon.mrc.xml"; + InputStream in = getClass().getResourceAsStream(s); + AtomicInteger count = new AtomicInteger(); + for (MarcRecord marcRecord : Marc.builder() .setInputStream(in) .setCharset(StandardCharsets.UTF_8) - .setFormat("AlephXML") - .setType("Bibliographic") - .build() - .writeCollection(); - writer.endDocument(); - assertNull(writer.getException()); + .xmlIterable()) { + logger.log(Level.INFO, marcRecord.toString()); + count.incrementAndGet(); + } + assertEquals(2, count.get()); + } + + @Test + public void testXmlIterator() { + String s = "HT016424175.xml"; + InputStream in = getClass().getResourceAsStream(s); + MarcXchangeEventConsumer consumer = new MarcXchangeEventConsumer(); + consumer.addNamespace("http://www.ddb.de/professionell/mabxml/mabxml-1.xsd"); + Iterator iterator = Marc.builder() + .setInputStream(in) + .setCharset(StandardCharsets.UTF_8) + .xmlRecordIterator(consumer); + AtomicInteger count = new AtomicInteger(); + while (iterator.hasNext()) { + logger.log(Level.INFO, iterator.next().toString()); + count.incrementAndGet(); + } + assertEquals(1, count.get()); } }