From d73164601d03c02933ad2b052d2fb4a211ac8a6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Thu, 9 Mar 2017 10:18:01 +0100 Subject: [PATCH] add ISSN collector by record identifier --- ...MarcFieldFilterByRecordIdentifierTest.java | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 src/test/java/org/xbib/marc/filter/MarcFieldFilterByRecordIdentifierTest.java diff --git a/src/test/java/org/xbib/marc/filter/MarcFieldFilterByRecordIdentifierTest.java b/src/test/java/org/xbib/marc/filter/MarcFieldFilterByRecordIdentifierTest.java new file mode 100644 index 0000000..74f55f0 --- /dev/null +++ b/src/test/java/org/xbib/marc/filter/MarcFieldFilterByRecordIdentifierTest.java @@ -0,0 +1,106 @@ +package org.xbib.marc.filter; + +import org.junit.Test; +import org.xbib.content.XContentBuilder; +import org.xbib.marc.Marc; +import org.xbib.marc.MarcField; +import org.xbib.marc.MarcFieldAdapter; +import org.xbib.marc.MarcListener; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.xbib.content.json.JsonXContent.contentBuilder; + +/** + * Demo of collecting ISSNs by record identifier from a MARC file. + * + * "issns.mrc" courtesy of Steven Hirren (steven.hirren.gmail.com) + */ +public class MarcFieldFilterByRecordIdentifierTest { + + private static final Logger logger = Logger.getLogger(MarcFieldFilterByRecordIdentifierTest.class.getName()); + + @Test + public void findISSNs() throws IOException { + Map>>> result = new TreeMap<>(); + // set up MARC listener + MarcListener marcListener = new MarcFieldAdapter() { + String recordIdentifier; + Map>> fields; + @Override + public void field(MarcField field) { + if ("001".equals(field.getTag())) { + recordIdentifier = field.getValue(); + fields = new TreeMap<>(); + } + Collection> values = field.getSubfields().stream() + .filter(f -> matchISSNField(field, f)) + .map(f -> Collections.singletonMap(f.getId(), f.getValue())) + .collect(Collectors.toList()); + if (!values.isEmpty()) { + fields.putIfAbsent(field.getTag(), new ArrayList<>()); + List> list = fields.get(field.getTag()); + list.addAll(values); + fields.put(field.getTag(), list); + } + } + @Override + public void endRecord() { + result.put(recordIdentifier, fields); + } + }; + // read MARC file + Marc.builder() + .setInputStream(getClass().getResource("issns.mrc").openStream()) + .setMarcListener(marcListener) + .build() + .writeCollection(); + + // collect all ISSNs from all records and all fields and make them unique + List> issns = + result.values().stream() + .map(Map::values) + .flatMap(Collection::stream) + .collect(Collectors.toList()).stream() + .flatMap(Collection::stream) + .collect(Collectors.toList()).stream() + .map(Map::values) + .flatMap(Collection::stream) + .distinct() + .collect(Collectors.toList()); + + // JSON output + XContentBuilder builder = contentBuilder().prettyPrint() + .startObject(); + for (Map.Entry>>> entry : result.entrySet()) { + builder.field(entry.getKey(), entry.getValue()); + } + builder.field("issns", issns); + builder.endObject(); + + logger.log(Level.INFO, builder.string()); + } + + private static boolean matchISSNField(MarcField field, MarcField.Subfield subfield) { + switch (field.getTag()) { + case "011": { + return "a".equals(subfield.getId()) || "f".equals(subfield.getId()); + } + case "421": + case "451": + case "452": + case "488": + return "x".equals(subfield.getId()); + } + return false; + } +}