move XML factories out of Marc processing to allow efficient reuse of XML parsers, fix NPE in subfield transforming

This commit is contained in:
Jörg Prante 2021-04-27 00:22:38 +02:00
parent 3fa26ade80
commit 5ed7a065d7
11 changed files with 45 additions and 41 deletions

View file

@ -1,4 +0,0 @@
language: java
jdk:
- openjdk11

View file

@ -1,9 +1,9 @@
group = org.xbib
name = marc
version = 2.4.0
version = 2.5.0
gradle.wrapper.version = 6.6.1
xbib-content.version = 2.3.1
xbib-content.version = 2.6.4
xbib-bibliographic-character-sets.version = 2.0.0
xalan.version = 2.7.2
xmlunit-matchers.version = 2.6.3

View file

@ -1,5 +1,5 @@
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.6.2'
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.7.0'
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
dependencies {
@ -11,7 +11,7 @@ dependencies {
test {
useJUnitPlatform()
failFast = true
failFast = false
testLogging {
events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED'
}

View file

@ -1 +0,0 @@
rootProject.name = name

View file

@ -126,18 +126,18 @@ public final class Marc {
/**
* Run XML stream parser over an XML input stream, with an XML event consumer.
* @param xmlInputFactory the XML input factory
* @param consumer the XML event consumer
* @throws XMLStreamException if parsing fails
*/
public void parseEvents(MarcXchangeEventConsumer consumer) throws XMLStreamException {
public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
Objects.requireNonNull(consumer);
if (builder.getMarcListeners() != null) {
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
consumer.setMarcListener(entry.getKey(), entry.getValue());
}
}
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(builder.getInputStream());
XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(builder.getInputStream());
while (xmlEventReader.hasNext()) {
consumer.add(xmlEventReader.nextEvent());
}
@ -240,16 +240,6 @@ public final class Marc {
new InputSource(builder.getInputStream())).document();
}
/**
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
* @param stylesheetUrl the URL of the stylesheet
* @param result the result of the transformation
* @throws IOException if transformation fails
*/
public void transform(URL stylesheetUrl, Result result) throws IOException {
transform(TransformerFactory.newInstance(), stylesheetUrl, result);
}
/**
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
*
@ -440,6 +430,8 @@ public final class Marc {
protected final Builder builder;
protected SAXParser saxParser;
private MarcXmlReader(Builder builder) {
this.builder = builder;
}
@ -504,6 +496,11 @@ public final class Marc {
return builder.getContentHandler();
}
public MarcXmlReader setSaxParser(SAXParser saxParser) {
this.saxParser = saxParser;
return this;
}
/**
* Parse MARC XML via SAX.
*
@ -524,21 +521,23 @@ public final class Marc {
builder.setContentHandler(handler);
}
try {
if (saxParser == null) {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
SAXParser sax = factory.newSAXParser();
saxParser = factory.newSAXParser();
}
if (builder.getFeatures() != null) {
for (Map.Entry<String, Boolean> entry : builder.getFeatures().entrySet()) {
sax.getXMLReader().setFeature(entry.getKey(), entry.getValue());
saxParser.getXMLReader().setFeature(entry.getKey(), entry.getValue());
}
}
if (builder.getProperties() != null) {
for (Map.Entry<String, Object> entry : builder.getProperties().entrySet()) {
sax.getXMLReader().setProperty(entry.getKey(), entry.getValue());
saxParser.getXMLReader().setProperty(entry.getKey(), entry.getValue());
}
}
sax.getXMLReader().setContentHandler(builder.getContentHandler());
sax.getXMLReader().parse(inputSource);
saxParser.getXMLReader().setContentHandler(builder.getContentHandler());
saxParser.getXMLReader().parse(inputSource);
} catch (SAXException | ParserConfigurationException e) {
throw new IOException(e);
} finally {
@ -628,9 +627,9 @@ public final class Marc {
private InverseMarcContentHandler defaultContentHandler;
private Map<String, Boolean> features = new HashMap<>();
private final Map<String, Boolean> features = new HashMap<>();
private Map<String, Object> properties = new HashMap<>();
private final Map<String, Object> properties = new HashMap<>();
private Map<String, MarcListener> listeners = new HashMap<>();

View file

@ -362,9 +362,9 @@ public class MarcField implements Comparable<MarcField> {
private String value;
private LinkedList<Subfield> subfields;
private final LinkedList<Subfield> subfields;
private LinkedList<String> subfieldIds;
private final LinkedList<String> subfieldIds;
private Boolean isControl;
@ -516,7 +516,10 @@ public class MarcField implements Comparable<MarcField> {
* @return this builder
*/
public Builder subfieldValue(String value) {
subfields.add(new Subfield(subfields.removeLast().getId(), value));
if (!subfields.isEmpty()) {
String id = subfields.removeLast().getId();
subfields.add(new Subfield(id, value));
}
return this;
}
@ -590,7 +593,8 @@ public class MarcField implements Comparable<MarcField> {
this.position = field.getPosition();
this.length = field.getLength();
this.value = field.getValue();
this.subfields = new LinkedList<>(field.getSubfields());
this.subfields.clear();
this.subfields.addAll(field.getSubfields());
for (Subfield subfield : subfields) {
subfieldIds.add(subfield.getId());
}

View file

@ -29,6 +29,7 @@ import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
/**
@ -110,7 +111,8 @@ public class MarcTool {
.setMarcListener(writer);
if (schema != null && stylesheet != null && result != null) {
System.setProperty("http.agent", "Java Agent");
builder.setSchema(schema).build().transform(new URL(stylesheet),
builder.setSchema(schema).build().transform(TransformerFactory.newInstance(),
new URL(stylesheet),
new StreamResult(Files.newBufferedWriter(Paths.get(result))));
} else {
builder.build().writeCollection(65536);

View file

@ -31,6 +31,7 @@ public class MarcFieldTransformers extends LinkedList<MarcFieldTransformer> {
LinkedList<MarcField> list = new LinkedList<>();
// lastBuilt allows us to access the last MarcField built across different MarcFieldTransformer
MarcField lastBuilt = null;
// critial area - marc fields must not change here - so we can not reuse transformes by multiple threads
for (MarcField marcField : marcFields) {
boolean found = false;
String key;

View file

@ -180,13 +180,14 @@ public class MarcToModsTest {
StringWriter sw = new StringWriter();
Result result = new StreamResult(sw);
System.setProperty("http.agent", "Java Agent");
marc.transform(new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
marc.transform(TransformerFactory.newInstance(),
new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
assertThat(sw.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream()));
}
private class ClasspathResourceURIResolver implements URIResolver {
private static class ClasspathResourceURIResolver implements URIResolver {
@Override
public Source resolve(String href, String base) throws TransformerException {
return new StreamSource(getClass().getResourceAsStream(href));

View file

@ -1,8 +1,9 @@
package org.xbib.marc.tools;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
@Disabled("system exit ends testing?")
public class ToolTest {
@Test

View file

@ -8,6 +8,7 @@ import org.xmlunit.matchers.CompareMatcher;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import javax.xml.stream.XMLInputFactory;
/**
*
@ -44,7 +45,7 @@ public class MarcEventConsumerTest {
.setFormat("AlephXML")
.setType("Bibliographic")
.build()
.parseEvents(consumer);
.parseEvents(XMLInputFactory.newFactory(), consumer);
writer.endCollection();
writer.endDocument();
sw.close();