move XML factories out of Marc processing to allow efficient reuse of XML parsers, fix NPE in subfield transforming
This commit is contained in:
parent
3fa26ade80
commit
5ed7a065d7
11 changed files with 45 additions and 41 deletions
|
@ -1,4 +0,0 @@
|
||||||
language: java
|
|
||||||
jdk:
|
|
||||||
- openjdk11
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
group = org.xbib
|
group = org.xbib
|
||||||
name = marc
|
name = marc
|
||||||
version = 2.4.0
|
version = 2.5.0
|
||||||
|
|
||||||
gradle.wrapper.version = 6.6.1
|
gradle.wrapper.version = 6.6.1
|
||||||
xbib-content.version = 2.3.1
|
xbib-content.version = 2.6.4
|
||||||
xbib-bibliographic-character-sets.version = 2.0.0
|
xbib-bibliographic-character-sets.version = 2.0.0
|
||||||
xalan.version = 2.7.2
|
xalan.version = 2.7.2
|
||||||
xmlunit-matchers.version = 2.6.3
|
xmlunit-matchers.version = 2.6.3
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.6.2'
|
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.7.0'
|
||||||
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
|
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
|
@ -11,7 +11,7 @@ dependencies {
|
||||||
|
|
||||||
test {
|
test {
|
||||||
useJUnitPlatform()
|
useJUnitPlatform()
|
||||||
failFast = true
|
failFast = false
|
||||||
testLogging {
|
testLogging {
|
||||||
events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED'
|
events 'STARTED', 'PASSED', 'FAILED', 'SKIPPED'
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
rootProject.name = name
|
|
|
@ -126,18 +126,18 @@ public final class Marc {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run XML stream parser over an XML input stream, with an XML event consumer.
|
* Run XML stream parser over an XML input stream, with an XML event consumer.
|
||||||
|
* @param xmlInputFactory the XML input factory
|
||||||
* @param consumer the XML event consumer
|
* @param consumer the XML event consumer
|
||||||
* @throws XMLStreamException if parsing fails
|
* @throws XMLStreamException if parsing fails
|
||||||
*/
|
*/
|
||||||
public void parseEvents(MarcXchangeEventConsumer consumer) throws XMLStreamException {
|
public void parseEvents(XMLInputFactory xmlInputFactory, MarcXchangeEventConsumer consumer) throws XMLStreamException {
|
||||||
Objects.requireNonNull(consumer);
|
Objects.requireNonNull(consumer);
|
||||||
if (builder.getMarcListeners() != null) {
|
if (builder.getMarcListeners() != null) {
|
||||||
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
|
for (Map.Entry<String, MarcListener> entry : builder.getMarcListeners().entrySet()) {
|
||||||
consumer.setMarcListener(entry.getKey(), entry.getValue());
|
consumer.setMarcListener(entry.getKey(), entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
|
XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(builder.getInputStream());
|
||||||
XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(builder.getInputStream());
|
|
||||||
while (xmlEventReader.hasNext()) {
|
while (xmlEventReader.hasNext()) {
|
||||||
consumer.add(xmlEventReader.nextEvent());
|
consumer.add(xmlEventReader.nextEvent());
|
||||||
}
|
}
|
||||||
|
@ -240,16 +240,6 @@ public final class Marc {
|
||||||
new InputSource(builder.getInputStream())).document();
|
new InputSource(builder.getInputStream())).document();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
|
|
||||||
* @param stylesheetUrl the URL of the stylesheet
|
|
||||||
* @param result the result of the transformation
|
|
||||||
* @throws IOException if transformation fails
|
|
||||||
*/
|
|
||||||
public void transform(URL stylesheetUrl, Result result) throws IOException {
|
|
||||||
transform(TransformerFactory.newInstance(), stylesheetUrl, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
|
* Transform W3C document of the record in the ISO 2709 input stream by an XSL stylesheet.
|
||||||
*
|
*
|
||||||
|
@ -440,6 +430,8 @@ public final class Marc {
|
||||||
|
|
||||||
protected final Builder builder;
|
protected final Builder builder;
|
||||||
|
|
||||||
|
protected SAXParser saxParser;
|
||||||
|
|
||||||
private MarcXmlReader(Builder builder) {
|
private MarcXmlReader(Builder builder) {
|
||||||
this.builder = builder;
|
this.builder = builder;
|
||||||
}
|
}
|
||||||
|
@ -504,6 +496,11 @@ public final class Marc {
|
||||||
return builder.getContentHandler();
|
return builder.getContentHandler();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public MarcXmlReader setSaxParser(SAXParser saxParser) {
|
||||||
|
this.saxParser = saxParser;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse MARC XML via SAX.
|
* Parse MARC XML via SAX.
|
||||||
*
|
*
|
||||||
|
@ -524,21 +521,23 @@ public final class Marc {
|
||||||
builder.setContentHandler(handler);
|
builder.setContentHandler(handler);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
SAXParserFactory factory = SAXParserFactory.newInstance();
|
if (saxParser == null) {
|
||||||
factory.setNamespaceAware(true);
|
SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||||
SAXParser sax = factory.newSAXParser();
|
factory.setNamespaceAware(true);
|
||||||
|
saxParser = factory.newSAXParser();
|
||||||
|
}
|
||||||
if (builder.getFeatures() != null) {
|
if (builder.getFeatures() != null) {
|
||||||
for (Map.Entry<String, Boolean> entry : builder.getFeatures().entrySet()) {
|
for (Map.Entry<String, Boolean> entry : builder.getFeatures().entrySet()) {
|
||||||
sax.getXMLReader().setFeature(entry.getKey(), entry.getValue());
|
saxParser.getXMLReader().setFeature(entry.getKey(), entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (builder.getProperties() != null) {
|
if (builder.getProperties() != null) {
|
||||||
for (Map.Entry<String, Object> entry : builder.getProperties().entrySet()) {
|
for (Map.Entry<String, Object> entry : builder.getProperties().entrySet()) {
|
||||||
sax.getXMLReader().setProperty(entry.getKey(), entry.getValue());
|
saxParser.getXMLReader().setProperty(entry.getKey(), entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sax.getXMLReader().setContentHandler(builder.getContentHandler());
|
saxParser.getXMLReader().setContentHandler(builder.getContentHandler());
|
||||||
sax.getXMLReader().parse(inputSource);
|
saxParser.getXMLReader().parse(inputSource);
|
||||||
} catch (SAXException | ParserConfigurationException e) {
|
} catch (SAXException | ParserConfigurationException e) {
|
||||||
throw new IOException(e);
|
throw new IOException(e);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -628,9 +627,9 @@ public final class Marc {
|
||||||
|
|
||||||
private InverseMarcContentHandler defaultContentHandler;
|
private InverseMarcContentHandler defaultContentHandler;
|
||||||
|
|
||||||
private Map<String, Boolean> features = new HashMap<>();
|
private final Map<String, Boolean> features = new HashMap<>();
|
||||||
|
|
||||||
private Map<String, Object> properties = new HashMap<>();
|
private final Map<String, Object> properties = new HashMap<>();
|
||||||
|
|
||||||
private Map<String, MarcListener> listeners = new HashMap<>();
|
private Map<String, MarcListener> listeners = new HashMap<>();
|
||||||
|
|
||||||
|
|
|
@ -362,9 +362,9 @@ public class MarcField implements Comparable<MarcField> {
|
||||||
|
|
||||||
private String value;
|
private String value;
|
||||||
|
|
||||||
private LinkedList<Subfield> subfields;
|
private final LinkedList<Subfield> subfields;
|
||||||
|
|
||||||
private LinkedList<String> subfieldIds;
|
private final LinkedList<String> subfieldIds;
|
||||||
|
|
||||||
private Boolean isControl;
|
private Boolean isControl;
|
||||||
|
|
||||||
|
@ -516,7 +516,10 @@ public class MarcField implements Comparable<MarcField> {
|
||||||
* @return this builder
|
* @return this builder
|
||||||
*/
|
*/
|
||||||
public Builder subfieldValue(String value) {
|
public Builder subfieldValue(String value) {
|
||||||
subfields.add(new Subfield(subfields.removeLast().getId(), value));
|
if (!subfields.isEmpty()) {
|
||||||
|
String id = subfields.removeLast().getId();
|
||||||
|
subfields.add(new Subfield(id, value));
|
||||||
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -590,7 +593,8 @@ public class MarcField implements Comparable<MarcField> {
|
||||||
this.position = field.getPosition();
|
this.position = field.getPosition();
|
||||||
this.length = field.getLength();
|
this.length = field.getLength();
|
||||||
this.value = field.getValue();
|
this.value = field.getValue();
|
||||||
this.subfields = new LinkedList<>(field.getSubfields());
|
this.subfields.clear();
|
||||||
|
this.subfields.addAll(field.getSubfields());
|
||||||
for (Subfield subfield : subfields) {
|
for (Subfield subfield : subfields) {
|
||||||
subfieldIds.add(subfield.getId());
|
subfieldIds.add(subfield.getId());
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Objects;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
|
import javax.xml.transform.TransformerFactory;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -110,8 +111,9 @@ public class MarcTool {
|
||||||
.setMarcListener(writer);
|
.setMarcListener(writer);
|
||||||
if (schema != null && stylesheet != null && result != null) {
|
if (schema != null && stylesheet != null && result != null) {
|
||||||
System.setProperty("http.agent", "Java Agent");
|
System.setProperty("http.agent", "Java Agent");
|
||||||
builder.setSchema(schema).build().transform(new URL(stylesheet),
|
builder.setSchema(schema).build().transform(TransformerFactory.newInstance(),
|
||||||
new StreamResult(Files.newBufferedWriter(Paths.get(result))));
|
new URL(stylesheet),
|
||||||
|
new StreamResult(Files.newBufferedWriter(Paths.get(result))));
|
||||||
} else {
|
} else {
|
||||||
builder.build().writeCollection(65536);
|
builder.build().writeCollection(65536);
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ public class MarcFieldTransformers extends LinkedList<MarcFieldTransformer> {
|
||||||
LinkedList<MarcField> list = new LinkedList<>();
|
LinkedList<MarcField> list = new LinkedList<>();
|
||||||
// lastBuilt allows us to access the last MarcField built across different MarcFieldTransformer
|
// lastBuilt allows us to access the last MarcField built across different MarcFieldTransformer
|
||||||
MarcField lastBuilt = null;
|
MarcField lastBuilt = null;
|
||||||
|
// critial area - marc fields must not change here - so we can not reuse transformes by multiple threads
|
||||||
for (MarcField marcField : marcFields) {
|
for (MarcField marcField : marcFields) {
|
||||||
boolean found = false;
|
boolean found = false;
|
||||||
String key;
|
String key;
|
||||||
|
|
|
@ -180,13 +180,14 @@ public class MarcToModsTest {
|
||||||
StringWriter sw = new StringWriter();
|
StringWriter sw = new StringWriter();
|
||||||
Result result = new StreamResult(sw);
|
Result result = new StreamResult(sw);
|
||||||
System.setProperty("http.agent", "Java Agent");
|
System.setProperty("http.agent", "Java Agent");
|
||||||
marc.transform(new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
|
marc.transform(TransformerFactory.newInstance(),
|
||||||
|
new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
|
||||||
assertThat(sw.toString(),
|
assertThat(sw.toString(),
|
||||||
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream()));
|
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private class ClasspathResourceURIResolver implements URIResolver {
|
private static class ClasspathResourceURIResolver implements URIResolver {
|
||||||
@Override
|
@Override
|
||||||
public Source resolve(String href, String base) throws TransformerException {
|
public Source resolve(String href, String base) throws TransformerException {
|
||||||
return new StreamSource(getClass().getResourceAsStream(href));
|
return new StreamSource(getClass().getResourceAsStream(href));
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
package org.xbib.marc.tools;
|
package org.xbib.marc.tools;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.params.ParameterizedTest;
|
|
||||||
|
|
||||||
|
@Disabled("system exit ends testing?")
|
||||||
public class ToolTest {
|
public class ToolTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -8,6 +8,7 @@ import org.xmlunit.matchers.CompareMatcher;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import javax.xml.stream.XMLInputFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -44,7 +45,7 @@ public class MarcEventConsumerTest {
|
||||||
.setFormat("AlephXML")
|
.setFormat("AlephXML")
|
||||||
.setType("Bibliographic")
|
.setType("Bibliographic")
|
||||||
.build()
|
.build()
|
||||||
.parseEvents(consumer);
|
.parseEvents(XMLInputFactory.newFactory(), consumer);
|
||||||
writer.endCollection();
|
writer.endCollection();
|
||||||
writer.endDocument();
|
writer.endDocument();
|
||||||
sw.close();
|
sw.close();
|
||||||
|
|
Loading…
Reference in a new issue