diff --git a/src/test/java/org/xbib/marc/xml/BadRecordIdentifierTest.java b/src/test/java/org/xbib/marc/xml/BadRecordIdentifierTest.java new file mode 100644 index 0000000..eb8ae26 --- /dev/null +++ b/src/test/java/org/xbib/marc/xml/BadRecordIdentifierTest.java @@ -0,0 +1,73 @@ +package org.xbib.marc.xml; + +import org.junit.jupiter.api.Test; +import org.xbib.marc.Marc; +import org.xbib.marc.MarcRecord; +import org.xbib.marc.MarcRecordListener; +import org.xbib.marc.transformer.value.MarcValueTransformer; +import org.xbib.marc.transformer.value.MarcValueTransformers; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class BadRecordIdentifierTest { + + private static final Logger logger = Logger.getLogger(BadRecordIdentifierTest.class.getName()); + + @Test + public void testBadRecordId() throws Exception { + String s = "badid.xml"; + InputStream in = getClass().getResourceAsStream(s); + AtomicBoolean found = new AtomicBoolean(); + MarcRecordListener marcRecordListener = new MarcRecordListener() { + @Override + public void beginCollection() { + } + + @Override + public void record(MarcRecord marcRecord) { + logger.log(Level.INFO, "record = " + marcRecord); + found.set(true); + } + + @Override + public void endCollection() { + } + }; + MarcContentHandler marcListener = new MarcContentHandler(); + marcListener.setMarcRecordListener(marcRecordListener); + MarcValueTransformer marcValueTransformer = BadRecordIdentifierTest::clean; + MarcValueTransformers marcValueTransformers = new MarcValueTransformers(); + marcValueTransformers.setMarcValueTransformer("001$$", marcValueTransformer); + Marc.builder() + .setInputStream(in) + .setCharset(StandardCharsets.UTF_8) + .setContentHandler(marcListener) + .setMarcValueTransformers(marcValueTransformers) + .build() + .xmlReader() + .parse(); + assertTrue(found.get()); + } + + private static String clean(String string) { + StringBuilder sb = new StringBuilder(); + if (string != null) { + for (char ch : string.toCharArray()) { + if (ch < 32 || ch > 127) { + break; + } + if (sb.length() > 31) { + break; + } + sb.append(ch); + } + } + return sb.toString(); + } +} diff --git a/src/test/java/org/xbib/marc/xml/HbzfixXMLTest.java b/src/test/java/org/xbib/marc/xml/HbzfixXMLTest.java new file mode 100644 index 0000000..eb27479 --- /dev/null +++ b/src/test/java/org/xbib/marc/xml/HbzfixXMLTest.java @@ -0,0 +1,67 @@ +/** + * Copyright 2016-2022 Jörg Prante + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * Apache License 2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.xbib.marc.xml; + +import org.junit.jupiter.api.Test; +import org.xbib.marc.Marc; +import org.xbib.marc.MarcRecord; +import org.xbib.marc.MarcRecordListener; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class HbzfixXMLTest { + + private static final Logger logger = Logger.getLogger(HbzfixXMLTest.class.getName()); + + @Test + public void testHbzFixRecordListener() throws Exception { + String s = "hbzfix.xml"; + InputStream in = getClass().getResourceAsStream(s); + AtomicBoolean found = new AtomicBoolean(); + MarcRecordListener marcRecordListener = new MarcRecordListener() { + @Override + public void beginCollection() { + } + + @Override + public void record(MarcRecord marcRecord) { + logger.log(Level.INFO, "record = " + marcRecord); + found.set(true); + } + + @Override + public void endCollection() { + } + }; + // attach record listener + MarcContentHandler marcListener = new MarcContentHandler(); + marcListener.setMarcRecordListener(marcRecordListener); + Marc.builder() + .setInputStream(in) + .setCharset(StandardCharsets.UTF_8) + .setContentHandler(marcListener) + .build() + .xmlReader() + .parse(); + assertTrue(found.get()); + } +} diff --git a/src/test/resources/org/xbib/marc/xml/badid.xml b/src/test/resources/org/xbib/marc/xml/badid.xml new file mode 100644 index 0000000..f052cab --- /dev/null +++ b/src/test/resources/org/xbib/marc/xml/badid.xml @@ -0,0 +1,76 @@ + + + + 00000nam a2200000uu 4500 + (DE-447)EMARCHITECTS - BURST��003 North Haven . MASAHARU TAKASAKI - Tenchi House Nagoya . JYRKI TASA - House Moby Dick Espoo . CHRIS TATE Forest House Titirangi . TEZUKA ARCHITECTS - Pitched Roof House Nagano . TNA - Ring House Karuizawa, Nagano/ Mosaic House Tokyo . VO TRONG NGHIA - Stacking Green Ho Chi Minh City . WEBER l HUMMEL - Huse u Erlangen . WILLIAMS AND TSIEN - Shelter Island House Shelter Island . ZECC - Stairway to Heaven Utrecht . ZHANG LEI - Brick House 01 Nanjing/ Concrete Slit House Nanjing . PETER ZUMTHOR - Single-Family Hose Jenaz + DE-1020 + 20200401000000.0 + 200401s2014 gw |||| |||| 00||| ger|c + + 9783473327218 + + + 3473327212 + + + DE-1020 + ger + DE-605 + + + Weltraum + [mit ausklappbaren Entdecker-Seiten ; 8 - 12 Jahre] + [Text und Konzept: Stefan Greschik. Ill.: Jochen Windecker. Comic-Ill.: Billa Spiegelhauer. Fachl. Beratung: + Richard Bräucker] + + + Ravensburg + Ravensburger Buchverl. + 2014 + + + 54 S. + + + DE-1020 + NRW + + + + 00000nam a2200000uu 4500 + (DE-1020)E998535454 + DE-1020 + 20200401000000.0 + 200401s2014 gw |||| |||| 00||| ger|c + + 9783473328888 + + + 347332888X + + + DE-1020 + ger + DE-605 + + + Nieländer, Peter + + + Rund um den Fußball + [Ill. und Text: Peter Nieländer] + + + Ravensburg + Ravensburger Buchverl. + 2014 + + + [16] S. + + + DE-1020 + NRW + + + \ No newline at end of file diff --git a/src/test/resources/org/xbib/marc/xml/hbzfix.xml b/src/test/resources/org/xbib/marc/xml/hbzfix.xml new file mode 100644 index 0000000..53fd52c --- /dev/null +++ b/src/test/resources/org/xbib/marc/xml/hbzfix.xml @@ -0,0 +1,76 @@ + + + + 00000nam a2200000uu 4500 + (DE-1020)E998535450 + DE-1020 + 20200401000000.0 + 200401s2014 gw |||| |||| 00||| ger|c + + 9783473327218 + + + 3473327212 + + + DE-1020 + ger + DE-605 + + + Weltraum + [mit ausklappbaren Entdecker-Seiten ; 8 - 12 Jahre] + [Text und Konzept: Stefan Greschik. Ill.: Jochen Windecker. Comic-Ill.: Billa Spiegelhauer. Fachl. Beratung: +Richard Bräucker] + + + Ravensburg + Ravensburger Buchverl. + 2014 + + + 54 S. + + + DE-1020 + NRW + + + + 00000nam a2200000uu 4500 + (DE-1020)E998535454 + DE-1020 + 20200401000000.0 + 200401s2014 gw |||| |||| 00||| ger|c + + 9783473328888 + + + 347332888X + + + DE-1020 + ger + DE-605 + + + Nieländer, Peter + + + Rund um den Fußball + [Ill. und Text: Peter Nieländer] + + + Ravensburg + Ravensburger Buchverl. + 2014 + + + [16] S. + + + DE-1020 + NRW + + + \ No newline at end of file