From 00378575917ccaaf769bb095d490efeeab3ff862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Wed, 2 Nov 2022 14:29:58 +0100 Subject: [PATCH] add plain map join in parsing from maps, add UTF-8 encoding to compiler and javadoc --- gradle.properties | 2 +- gradle/compile/java.gradle | 6 +- src/main/java/org/xbib/marc/MarcRecord.java | 56 ++++++++++++++++++- .../java/org/xbib/marc/MarcRecordTest.java | 47 +++++++++++++++- 4 files changed, 104 insertions(+), 7 deletions(-) diff --git a/gradle.properties b/gradle.properties index b0d5c27..0231c7e 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ group = org.xbib name = marc -version = 2.9.1 +version = 2.9.2 org.gradle.warning.mode = ALL diff --git a/gradle/compile/java.gradle b/gradle/compile/java.gradle index 7c5f2bc..d66bb9c 100644 --- a/gradle/compile/java.gradle +++ b/gradle/compile/java.gradle @@ -37,8 +37,10 @@ artifacts { tasks.withType(JavaCompile) { options.compilerArgs << '-Xlint:all' + options.encoding = 'UTF-8' } -javadoc { +tasks.withType(Javadoc) { options.addStringOption('Xdoclint:none', '-quiet') -} + options.encoding = 'UTF-8' +} \ No newline at end of file diff --git a/src/main/java/org/xbib/marc/MarcRecord.java b/src/main/java/org/xbib/marc/MarcRecord.java index 90123cc..75b4b97 100644 --- a/src/main/java/org/xbib/marc/MarcRecord.java +++ b/src/main/java/org/xbib/marc/MarcRecord.java @@ -30,6 +30,8 @@ import java.util.Set; import java.util.TreeMap; import java.util.function.BiConsumer; import java.util.function.Predicate; +import java.util.logging.Level; +import java.util.logging.Logger; import java.util.regex.Pattern; /** @@ -484,19 +486,27 @@ public class MarcRecord implements Map { if (!prefix.isEmpty()) { key.addLast(prefix); } - List> list = new LinkedList<>(); + LinkedList> list = new LinkedList<>(); source.forEach((k, v) -> { if (v instanceof Map) { parseMap((Map) v, k, key, consumer); } else if (v instanceof Collection) { Collection collection = (Collection) v; + // join into a single map if we have a collection of plain maps + Map map = new LinkedHashMap<>(); for (Object object : collection) { if (object instanceof Map) { - parseMap((Map) object, k, key, consumer); + Map m = (Map) object; + if (!join(map, m)) { + parseMap(m, k, key, consumer); + } } else { list.add(Map.entry(k, object)); } } + if (!map.isEmpty()) { + parseMap(map, k, key, consumer); + } } else { list.add(Map.entry(k, v)); } @@ -508,4 +518,46 @@ public class MarcRecord implements Map { key.removeLast(); } } + + @SuppressWarnings("unchecked") + private static boolean join(Map map1, Map map2) { + if (isPlainMap(map2)) { + String key2 = map2.keySet().iterator().next(); + Object value2 = map2.values().iterator().next(); + // collapse values into a single key + if (map1.containsKey(key2)) { + Object value1 = map1.get(key2); + Collection collection; + if (value1 instanceof Collection) { + collection = (Collection) value1; + collection.add(value2); + } else { + collection = new LinkedList<>(); + collection.add(value1); + collection.add(value2); + } + map1.put(key2, collection); + } else { + map1.put(key2, value2); + } + return true; + } else { + return false; + } + } + + /** + * A "plain" map is a map with exactly one element where the element value is not a map or a collection. + * This technique is used in Elasticsearch for repeating values with (possibly) the same key. + * @param map the map to be tested + * @return true if map is a plain map + */ + private static boolean isPlainMap(Map map) { + if (map.size() == 1) { + Object object = map.values().iterator().next(); + return !(object instanceof Map) && !(object instanceof Collection); + } else { + return false; + } + } } diff --git a/src/test/java/org/xbib/marc/MarcRecordTest.java b/src/test/java/org/xbib/marc/MarcRecordTest.java index a452036..521d5a1 100644 --- a/src/test/java/org/xbib/marc/MarcRecordTest.java +++ b/src/test/java/org/xbib/marc/MarcRecordTest.java @@ -38,8 +38,6 @@ import java.util.Map; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import java.util.logging.Level; -import java.util.logging.Logger; import java.util.regex.Pattern; public class MarcRecordTest { @@ -222,6 +220,7 @@ public class MarcRecordTest { @Test public void testMarcRecordFromMapNested() { + // test if we can have more than one map in a list Map map = Map.of("001", "123", "100", Map.of("_", Map.of("a", "Hello World")), "016", Map.of("7_", List.of(Map.of("2", "DE-101", "a", "010000151"), Map.of("2", "DE-600", "a", "23-1")))); @@ -243,6 +242,50 @@ public class MarcRecordTest { assertTrue(match.get()); } + @Test + public void testMarcRecordFromMapsWithJoinedPlainMaps() { + // test if we can collapse "plain" subfield maps into a common MARC field + // 016=[{7_=[{2=DE-101}, {a=010000151}]}, {7_=[{2=DE-600}, {a=23-1}]}] + Map f1 = Map.of("7_", List.of(Map.of("2", "DE-101"), Map.of("a", "010000151"))); + Map f2 = Map.of("7_", List.of(Map.of("2", "DE-600"), Map.of("a", "23-1"))); + Map map = Map.of("016", List.of(f1, f2)); + MarcRecord marcRecord = MarcRecord.from(map); + List list = new LinkedList<>(); + marcRecord.all(f -> "016".equals(f.getTag()), list::add); + assertEquals(2, list.size()); + AtomicBoolean match = new AtomicBoolean(); + marcRecord.all(f -> "016".equals(f.getTag()) && "7 ".equals(f.getIndicator()), f -> { + if ("DE-600".equals(f.getFirstSubfieldValue("2"))) { + match.set("23-1".equals(f.getFirstSubfieldValue("a"))); + } + }); + assertTrue(match.get()); + } + + @Test + public void testMarcRecordFromMapsWithSameSubfieldId() { + // 016=[{7_=[{a=foo}, {a=bar}}] + Map f1 = Map.of("7_", List.of(Map.of("a", "foo"), Map.of("a", "bar"))); + Map map = Map.of("016", List.of(f1)); + MarcRecord marcRecord = MarcRecord.from(map); + // we must have a single 016 field + List list = new LinkedList<>(); + marcRecord.all(f -> "016".equals(f.getTag()), list::add); + assertEquals(1, list.size()); + // we count for occurences of "foo" and "bar", both must exist + AtomicInteger count = new AtomicInteger(); + marcRecord.all(f -> "016".equals(f.getTag()) && "7 ".equals(f.getIndicator()), f -> + f.getSubfield("a").forEach(sf -> { + if ("foo".equals(sf.getValue())) { + count.incrementAndGet(); + } + if ("bar".equals(sf.getValue())) { + count.incrementAndGet(); + } + })); + assertEquals(2, count.get()); + } + @Test public void testMarcRecordFromMapAsMap() throws IOException { Map map = new TreeMap<>(Map.of("001", "123",