move XML factories out of Marc processing to allow efficient reuse of XML parsers, fix NPE in subfield transforming

This commit is contained in:
Jörg Prante 2021-04-27 00:22:38 +02:00
parent 3fa26ade80
commit 5ed7a065d7
11 changed files with 45 additions and 41 deletions

View file

@ -1,4 +0,0 @@
language: java
jdk:
- openjdk11

View file

@ -1,9 +1,9 @@
group = org.xbib group = org.xbib
name = marc name = marc
version = 2.4.0 version = 2.5.0
gradle.wrapper.version = 6.6.1 gradle.wrapper.version = 6.6.1
xbib-content.version = 2.3.1 xbib-content.version = 2.6.4
xbib-bibliographic-character-sets.version = 2.0.0 xbib-bibliographic-character-sets.version = 2.0.0
xalan.version = 2.7.2 xalan.version = 2.7.2
xmlunit-matchers.version = 2.6.3 xmlunit-matchers.version = 2.6.3

View file

@ -1,5 +1,5 @@
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.6.2' def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.7.0'
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2' def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
dependencies { dependencies {
@ -11,7 +11,7 @@ dependencies {
test { test {
useJUnitPlatform() useJUnitPlatform()
failFast = true failFast = false
testLogging { testLogging {
events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED' events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED'
} }

View file

@ -1 +0,0 @@
rootProject.name = name

View file

@ -126,18 +126,18 @@ public final class Marc {
/** /**
* Run XML stream parser over an XML input stream, with an XML event consumer. * Run XML stream parser over an XML input stream, with an XML event consumer.
* @param xmlInputFactory the XML input factory
* @param consumer the XML event consumer * @param consumer the XML event consumer
* @throws XMLStreamException if parsing fails * @throws XMLStreamException if parsing fails
*/ */
public void parseEvents(MarcXchangeEventConsumer consumer) throws XMLStreamException { public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
Objects.requireNonNull(consumer); Objects.requireNonNull(consumer);
if (builder.getMarcListeners() != null) { if (builder.getMarcListeners() != null) {
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) { for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
consumer.setMarcListener(entry.getKey(), entry.getValue()); consumer.setMarcListener(entry.getKey(), entry.getValue());
} }
} }
XMLInputFactory inputFactory = XMLInputFactory.newInstance(); XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(builder.getInputStream());
XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(builder.getInputStream());
while (xmlEventReader.hasNext()) { while (xmlEventReader.hasNext()) {
consumer.add(xmlEventReader.nextEvent()); consumer.add(xmlEventReader.nextEvent());
} }
@ -240,16 +240,6 @@ public final class Marc {
new InputSource(builder.getInputStream())).document(); new InputSource(builder.getInputStream())).document();
} }
/**
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
* @param stylesheetUrl the URL of the stylesheet
* @param result the result of the transformation
* @throws IOException if transformation fails
*/
public void transform(URL stylesheetUrl, Result result) throws IOException {
transform(TransformerFactory.newInstance(), stylesheetUrl, result);
}
/** /**
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet. * Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
* *
@ -440,6 +430,8 @@ public final class Marc {
protected final Builder builder; protected final Builder builder;
protected SAXParser saxParser;
private MarcXmlReader(Builder builder) { private MarcXmlReader(Builder builder) {
this.builder = builder; this.builder = builder;
} }
@ -504,6 +496,11 @@ public final class Marc {
return builder.getContentHandler(); return builder.getContentHandler();
} }
public MarcXmlReader setSaxParser(SAXParser saxParser) {
this.saxParser = saxParser;
return this;
}
/** /**
* Parse MARC XML via SAX. * Parse MARC XML via SAX.
* *
@ -524,21 +521,23 @@ public final class Marc {
builder.setContentHandler(handler); builder.setContentHandler(handler);
} }
try { try {
if (saxParser == null) {
SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true); factory.setNamespaceAware(true);
SAXParser sax = factory.newSAXParser(); saxParser = factory.newSAXParser();
}
if (builder.getFeatures() != null) { if (builder.getFeatures() != null) {
for (Map.Entry<String, Boolean> entry : builder.getFeatures().entrySet()) { for (Map.Entry<String, Boolean> entry : builder.getFeatures().entrySet()) {
sax.getXMLReader().setFeature(entry.getKey(), entry.getValue()); saxParser.getXMLReader().setFeature(entry.getKey(), entry.getValue());
} }
} }
if (builder.getProperties() != null) { if (builder.getProperties() != null) {
for (Map.Entry<String, Object> entry : builder.getProperties().entrySet()) { for (Map.Entry<String, Object> entry : builder.getProperties().entrySet()) {
sax.getXMLReader().setProperty(entry.getKey(), entry.getValue()); saxParser.getXMLReader().setProperty(entry.getKey(), entry.getValue());
} }
} }
sax.getXMLReader().setContentHandler(builder.getContentHandler()); saxParser.getXMLReader().setContentHandler(builder.getContentHandler());
sax.getXMLReader().parse(inputSource); saxParser.getXMLReader().parse(inputSource);
} catch (SAXException | ParserConfigurationException e) { } catch (SAXException | ParserConfigurationException e) {
throw new IOException(e); throw new IOException(e);
} finally { } finally {
@ -628,9 +627,9 @@ public final class Marc {
private InverseMarcContentHandler defaultContentHandler; private InverseMarcContentHandler defaultContentHandler;
private Map<String, Boolean> features = new HashMap<>(); private final Map<String, Boolean> features = new HashMap<>();
private Map<String, Object> properties = new HashMap<>(); private final Map<String, Object> properties = new HashMap<>();
private Map<String, MarcListener> listeners = new HashMap<>(); private Map<String, MarcListener> listeners = new HashMap<>();

View file

@ -362,9 +362,9 @@ public class MarcField implements Comparable<MarcField> {
private String value; private String value;
private LinkedList<Subfield> subfields; private final LinkedList<Subfield> subfields;
private LinkedList<String> subfieldIds; private final LinkedList<String> subfieldIds;
private Boolean isControl; private Boolean isControl;
@ -516,7 +516,10 @@ public class MarcField implements Comparable<MarcField> {
* @return this builder * @return this builder
*/ */
public Builder subfieldValue(String value) { public Builder subfieldValue(String value) {
subfields.add(new Subfield(subfields.removeLast().getId(), value)); if (!subfields.isEmpty()) {
String id = subfields.removeLast().getId();
subfields.add(new Subfield(id, value));
}
return this; return this;
} }
@ -590,7 +593,8 @@ public class MarcField implements Comparable<MarcField> {
this.position = field.getPosition(); this.position = field.getPosition();
this.length = field.getLength(); this.length = field.getLength();
this.value = field.getValue(); this.value = field.getValue();
this.subfields = new LinkedList<>(field.getSubfields()); this.subfields.clear();
this.subfields.addAll(field.getSubfields());
for (Subfield subfield : subfields) { for (Subfield subfield : subfields) {
subfieldIds.add(subfield.getId()); subfieldIds.add(subfield.getId());
} }

View file

@ -29,6 +29,7 @@ import java.util.Objects;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
/** /**
@ -110,7 +111,8 @@ public class MarcTool {
.setMarcListener(writer); .setMarcListener(writer);
if (schema != null && stylesheet != null && result != null) { if (schema != null && stylesheet != null && result != null) {
System.setProperty("http.agent", "Java Agent"); System.setProperty("http.agent", "Java Agent");
builder.setSchema(schema).build().transform(new URL(stylesheet), builder.setSchema(schema).build().transform(TransformerFactory.newInstance(),
new URL(stylesheet),
new StreamResult(Files.newBufferedWriter(Paths.get(result)))); new StreamResult(Files.newBufferedWriter(Paths.get(result))));
} else { } else {
builder.build().writeCollection(65536); builder.build().writeCollection(65536);

View file

@ -31,6 +31,7 @@ public class MarcFieldTransformers extends LinkedList<MarcFieldTransformer> {
LinkedList<MarcField> list = new LinkedList<>(); LinkedList<MarcField> list = new LinkedList<>();
// lastBuilt allows us to access the last MarcField built across different MarcFieldTransformer // lastBuilt allows us to access the last MarcField built across different MarcFieldTransformer
MarcField lastBuilt = null; MarcField lastBuilt = null;
// critial area - marc fields must not change here - so we can not reuse transformes by multiple threads
for (MarcField marcField : marcFields) { for (MarcField marcField : marcFields) {
boolean found = false; boolean found = false;
String key; String key;

View file

@ -180,13 +180,14 @@ public class MarcToModsTest {
StringWriter sw = new StringWriter(); StringWriter sw = new StringWriter();
Result result = new StreamResult(sw); Result result = new StreamResult(sw);
System.setProperty("http.agent", "Java Agent"); System.setProperty("http.agent", "Java Agent");
marc.transform(new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result); marc.transform(TransformerFactory.newInstance(),
new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
assertThat(sw.toString(), assertThat(sw.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream())); CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream()));
} }
private class ClasspathResourceURIResolver implements URIResolver { private static class ClasspathResourceURIResolver implements URIResolver {
@Override @Override
public Source resolve(String href, String base) throws TransformerException { public Source resolve(String href, String base) throws TransformerException {
return new StreamSource(getClass().getResourceAsStream(href)); return new StreamSource(getClass().getResourceAsStream(href));

View file

@ -1,8 +1,9 @@
package org.xbib.marc.tools; package org.xbib.marc.tools;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
@Disabled("system exit ends testing?")
public class ToolTest { public class ToolTest {
@Test @Test

View file

@ -8,6 +8,7 @@ import org.xmlunit.matchers.CompareMatcher;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import javax.xml.stream.XMLInputFactory;
/** /**
* *
@ -44,7 +45,7 @@ public class MarcEventConsumerTest {
.setFormat("AlephXML") .setFormat("AlephXML")
.setType("Bibliographic") .setType("Bibliographic")
.build() .build()
.parseEvents(consumer); .parseEvents(XMLInputFactory.newFactory(), consumer);
writer.endCollection(); writer.endCollection();
writer.endDocument(); writer.endDocument();
sw.close(); sw.close();