in MODS test, replace external XSL loading from loc.gov by internal XSL

This commit is contained in:
Jörg Prante 2022-11-07 22:29:56 +01:00
parent bcb4df4cce
commit 392a61dc12
4 changed files with 5279 additions and 131 deletions

View file

@ -1,7 +1,13 @@
pluginManagement {
repositories {
mavenLocal()
mavenCentral()
mavenCentral {
metadataSources {
mavenPom()
artifact()
ignoreGradleMetadataRedirection()
}
}
gradlePluginPortal()
}
}

View file

@ -45,163 +45,175 @@ public class MarcToModsTest {
@Test
public void testSax() throws Exception {
String s = "summerland.mrc";
InputStream marcInputStream = getClass().getResource(s).openStream();
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Source source = new SAXSource(marc.iso2709XmlReader(), new InputSource(marcInputStream));
StringWriter writer = new StringWriter();
StreamResult streamResult = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.transform(source, streamResult);
assertThat(writer.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-sax-marc.xml").openStream()));
try (InputStream marcInputStream = getClass().getResource(s).openStream()) {
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Source source = new SAXSource(marc.iso2709XmlReader(), new InputSource(marcInputStream));
StringWriter writer = new StringWriter();
StreamResult streamResult = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.transform(source, streamResult);
assertThat(writer.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-sax-marc.xml").openStream()));
}
}
@Test
public void testDom() throws Exception {
String s = "summerland.mrc";
InputStream marcInputStream = getClass().getResource(s).openStream();
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Sax2Dom sax2Dom = new Sax2Dom(marc.iso2709XmlReader(), new InputSource(marcInputStream));
Document document = sax2Dom.document();
StringWriter writer = new StringWriter();
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.transform(new DOMSource(document), new StreamResult(writer));
assertThat(writer.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-dom-marc.xml").openStream()));
try (InputStream marcInputStream = getClass().getResource(s).openStream()) {
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Sax2Dom sax2Dom = new Sax2Dom(marc.iso2709XmlReader(), new InputSource(marcInputStream));
Document document = sax2Dom.document();
StringWriter writer = new StringWriter();
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.transform(new DOMSource(document), new StreamResult(writer));
assertThat(writer.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-dom-marc.xml").openStream()));
}
}
@Test
public void testStylesheetSax() throws Exception {
String s = "summerland.mrc";
InputStream marcInputStream = getClass().getResource(s).openStream();
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Source source = new SAXSource(marc.iso2709XmlReader(), new InputSource(marcInputStream));
ByteArrayOutputStream out = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(out);
InputStream xslInputStream = getClass().getResourceAsStream("MARC21slim2MODS3-6.xsl");
TransformerFactory factory = TransformerFactory.newInstance();
factory.setURIResolver(new ClasspathResourceURIResolver());
Transformer transformer = factory.newTemplates(new StreamSource(xslInputStream)).newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.setErrorListener(new ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
try (InputStream marcInputStream = getClass().getResource(s).openStream()) {
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Source source = new SAXSource(marc.iso2709XmlReader(), new InputSource(marcInputStream));
ByteArrayOutputStream out = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(out);
InputStream xslInputStream = getClass().getResourceAsStream("MARC21slim2MODS3-6.xsl");
TransformerFactory factory = TransformerFactory.newInstance();
// required for realtive URI resolving in xsl:include
factory.setURIResolver(new ClasspathResourceURIResolver());
Transformer transformer = factory.newTemplates(new StreamSource(xslInputStream)).newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.setErrorListener(new ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void error(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void error(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void fatalError(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
});
transformer.transform(source, streamResult);
marcInputStream.close();
xslInputStream.close();
assertThat(out.toByteArray(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-sax-mods.xml").openStream()));
@Override
public void fatalError(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
});
transformer.transform(source, streamResult);
marcInputStream.close();
xslInputStream.close();
assertThat(out.toByteArray(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-sax-mods.xml").openStream()));
}
}
@Test
public void testStylesheetDom() throws Exception {
String s = "summerland.mrc";
InputStream marcInputStream = getClass().getResource(s).openStream();
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Sax2Dom sax2Dom = new Sax2Dom(marc.iso2709XmlReader(), new InputSource(marcInputStream));
Document document = sax2Dom.document();
Source source = new DOMSource(document);
ByteArrayOutputStream out = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(out);
InputStream xslInputStream = getClass().getResourceAsStream("MARC21slim2MODS3-6.xsl");
TransformerFactory factory = TransformerFactory.newInstance();
factory.setURIResolver(new ClasspathResourceURIResolver());
Transformer transformer = factory.newTemplates(new StreamSource(xslInputStream)).newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.setErrorListener(new ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
try (InputStream marcInputStream = getClass().getResource(s).openStream()) {
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
Sax2Dom sax2Dom = new Sax2Dom(marc.iso2709XmlReader(), new InputSource(marcInputStream));
Document document = sax2Dom.document();
Source source = new DOMSource(document);
ByteArrayOutputStream out = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(out);
InputStream xslInputStream = getClass().getResourceAsStream("MARC21slim2MODS3-6.xsl");
TransformerFactory factory = TransformerFactory.newInstance();
factory.setURIResolver(new ClasspathResourceURIResolver());
Transformer transformer = factory.newTemplates(new StreamSource(xslInputStream)).newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.setErrorListener(new ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void error(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void error(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
@Override
public void fatalError(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
});
transformer.transform(source, streamResult);
marcInputStream.close();
xslInputStream.close();
assertThat(out.toByteArray(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-dom-mods.xml").openStream()));
@Override
public void fatalError(TransformerException exception) throws TransformerException {
exception.printStackTrace();
}
});
transformer.transform(source, streamResult);
marcInputStream.close();
xslInputStream.close();
assertThat(out.toByteArray(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-dom-mods.xml").openStream()));
}
}
/**
* With regard to sandboxed CI/CD platforms, we avoid loading external XSL style sheets from loc.gov
* We run the test with a local copy.
*
* @throws Exception if test fails
*/
@Test
public void testLocStyleSheet() throws Exception {
public void testLocLocalCopyStyleSheet() throws Exception {
String s = "summerland.mrc";
InputStream marcInputStream = getClass().getResource(s).openStream();
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
StringWriter sw = new StringWriter();
Result result = new StreamResult(sw);
System.setProperty("http.agent", "Java Agent");
marc.transform(TransformerFactory.newInstance(),
new URL("http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl"), result);
assertThat(sw.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc.xml").openStream()));
try (InputStream marcInputStream = getClass().getResource(s).openStream()) {
Marc marc = Marc.builder()
.setInputStream(marcInputStream)
.setCharset(Charset.forName("ANSEL"))
.setSchema(MARC21_FORMAT)
.build();
StringWriter sw = new StringWriter();
Result result = new StreamResult(sw);
URL url = getClass().getResource("MARC21slim2MODS3.xsl");
TransformerFactory factory = TransformerFactory.newInstance();
factory.setURIResolver(new ClasspathResourceURIResolver());
marc.transform(factory, url, result);
assertThat(sw.toString(),
CompareMatcher.isIdenticalTo(getClass().getResource("summerland-mods-loc-goc-local-copy.xml").openStream()));
}
}
private static class ClasspathResourceURIResolver implements URIResolver {
@Override
public Source resolve(String href, String base) throws TransformerException {
public Source resolve(String href, String base) {
return new StreamSource(getClass().getResourceAsStream(href));
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?><mods xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd" version="3.4">
<titleInfo>
<title>Summerland</title>
</titleInfo>
<name type="personal" usage="primary">
<namePart>Chabon, Michael.</namePart>
</name>
<typeOfResource>text</typeOfResource>
<originInfo>
<place>
<placeTerm type="code" authority="marccountry">nyu</placeTerm>
</place>
<place>
<placeTerm type="text">New York</placeTerm>
</place>
<publisher>Miramax Books/Hyperion Books for Children</publisher>
<dateIssued>c2002</dateIssued>
<dateIssued encoding="marc">2002</dateIssued>
<edition>1st ed.</edition>
<issuance>monographic</issuance>
<frequency/>
</originInfo>
<language>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<physicalDescription>
<form authority="marcform">print</form>
<extent>500 p. ; 22 cm.</extent>
</physicalDescription>
<abstract>Ethan Feld, the worst baseball player in the history of the game, finds himself recruited by a 100-year-old scout to help a band of fairies triumph over an ancient enemy.</abstract>
<targetAudience authority="marctarget">juvenile</targetAudience>
<subject authority="lcshac">
<topic>Fantasy</topic>
</subject>
<subject authority="lcshac">
<topic>Baseball</topic>
<genre>Fiction</genre>
</subject>
<subject authority="lcshac">
<topic>Magic</topic>
<genre>Fiction</genre>
</subject>
<identifier type="isbn">0786808772</identifier>
<identifier type="isbn">0786816155 (pbk.)</identifier>
<recordInfo>
<descriptionStandard>aacr</descriptionStandard>
<recordContentSource authority="marcorg">DLC</recordContentSource>
<recordCreationDate encoding="marc">020805</recordCreationDate>
<recordChangeDate encoding="iso8601">20030616111422.0</recordChangeDate>
<recordIdentifier>12883376</recordIdentifier>
<recordOrigin>Converted from MARCXML to MODS version 3.4 using MARC21slim2MODS3-4.xsl
(Revision 1.70)</recordOrigin>
</recordInfo>
</mods>