add plain map join in parsing from maps, add UTF-8 encoding to compiler and javadoc

This commit is contained in:
Jörg Prante 2022-11-02 14:29:58 +01:00
parent 3bf648324c
commit 0037857591
4 changed files with 104 additions and 7 deletions

View file

@ -1,5 +1,5 @@
group = org.xbib group = org.xbib
name = marc name = marc
version = 2.9.1 version = 2.9.2
org.gradle.warning.mode = ALL org.gradle.warning.mode = ALL

View file

@ -37,8 +37,10 @@ artifacts {
tasks.withType(JavaCompile) { tasks.withType(JavaCompile) {
options.compilerArgs << '-Xlint:all' options.compilerArgs << '-Xlint:all'
options.encoding = 'UTF-8'
} }
javadoc { tasks.withType(Javadoc) {
options.addStringOption('Xdoclint:none', '-quiet') options.addStringOption('Xdoclint:none', '-quiet')
} options.encoding = 'UTF-8'
}

View file

@ -30,6 +30,8 @@ import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
@ -484,19 +486,27 @@ public class MarcRecord implements Map<String, Object> {
if (!prefix.isEmpty()) { if (!prefix.isEmpty()) {
key.addLast(prefix); key.addLast(prefix);
} }
List<Map.Entry<String, Object>> list = new LinkedList<>(); LinkedList<Map.Entry<String, Object>> list = new LinkedList<>();
source.forEach((k, v) -> { source.forEach((k, v) -> {
if (v instanceof Map) { if (v instanceof Map) {
parseMap((Map<String, Object>) v, k, key, consumer); parseMap((Map<String, Object>) v, k, key, consumer);
} else if (v instanceof Collection) { } else if (v instanceof Collection) {
Collection<Object> collection = (Collection<Object>) v; Collection<Object> collection = (Collection<Object>) v;
// join into a single map if we have a collection of plain maps
Map<String, Object> map = new LinkedHashMap<>();
for (Object object : collection) { for (Object object : collection) {
if (object instanceof Map) { if (object instanceof Map) {
parseMap((Map<String, Object>) object, k, key, consumer); Map<String, Object> m = (Map<String, Object>) object;
if (!join(map, m)) {
parseMap(m, k, key, consumer);
}
} else { } else {
list.add(Map.entry(k, object)); list.add(Map.entry(k, object));
} }
} }
if (!map.isEmpty()) {
parseMap(map, k, key, consumer);
}
} else { } else {
list.add(Map.entry(k, v)); list.add(Map.entry(k, v));
} }
@ -508,4 +518,46 @@ public class MarcRecord implements Map<String, Object> {
key.removeLast(); key.removeLast();
} }
} }
@SuppressWarnings("unchecked")
private static boolean join(Map<String, Object> map1, Map<String, Object> map2) {
if (isPlainMap(map2)) {
String key2 = map2.keySet().iterator().next();
Object value2 = map2.values().iterator().next();
// collapse values into a single key
if (map1.containsKey(key2)) {
Object value1 = map1.get(key2);
Collection<Object> collection;
if (value1 instanceof Collection) {
collection = (Collection<Object>) value1;
collection.add(value2);
} else {
collection = new LinkedList<>();
collection.add(value1);
collection.add(value2);
}
map1.put(key2, collection);
} else {
map1.put(key2, value2);
}
return true;
} else {
return false;
}
}
/**
* A "plain" map is a map with exactly one element where the element value is not a map or a collection.
* This technique is used in Elasticsearch for repeating values with (possibly) the same key.
* @param map the map to be tested
* @return true if map is a plain map
*/
private static boolean isPlainMap(Map<String, Object> map) {
if (map.size() == 1) {
Object object = map.values().iterator().next();
return !(object instanceof Map) && !(object instanceof Collection<?>);
} else {
return false;
}
}
} }

View file

@ -38,8 +38,6 @@ import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class MarcRecordTest { public class MarcRecordTest {
@ -222,6 +220,7 @@ public class MarcRecordTest {
@Test @Test
public void testMarcRecordFromMapNested() { public void testMarcRecordFromMapNested() {
// test if we can have more than one map in a list
Map<String, Object> map = Map.of("001", "123", Map<String, Object> map = Map.of("001", "123",
"100", Map.of("_", Map.of("a", "Hello World")), "100", Map.of("_", Map.of("a", "Hello World")),
"016", Map.of("7_", List.of(Map.of("2", "DE-101", "a", "010000151"), Map.of("2", "DE-600", "a", "23-1")))); "016", Map.of("7_", List.of(Map.of("2", "DE-101", "a", "010000151"), Map.of("2", "DE-600", "a", "23-1"))));
@ -243,6 +242,50 @@ public class MarcRecordTest {
assertTrue(match.get()); assertTrue(match.get());
} }
@Test
public void testMarcRecordFromMapsWithJoinedPlainMaps() {
// test if we can collapse "plain" subfield maps into a common MARC field
// 016=[{7_=[{2=DE-101}, {a=010000151}]}, {7_=[{2=DE-600}, {a=23-1}]}]
Map<String, Object> f1 = Map.of("7_", List.of(Map.of("2", "DE-101"), Map.of("a", "010000151")));
Map<String, Object> f2 = Map.of("7_", List.of(Map.of("2", "DE-600"), Map.of("a", "23-1")));
Map<String, Object> map = Map.of("016", List.of(f1, f2));
MarcRecord marcRecord = MarcRecord.from(map);
List<MarcField> list = new LinkedList<>();
marcRecord.all(f -> "016".equals(f.getTag()), list::add);
assertEquals(2, list.size());
AtomicBoolean match = new AtomicBoolean();
marcRecord.all(f -> "016".equals(f.getTag()) && "7 ".equals(f.getIndicator()), f -> {
if ("DE-600".equals(f.getFirstSubfieldValue("2"))) {
match.set("23-1".equals(f.getFirstSubfieldValue("a")));
}
});
assertTrue(match.get());
}
@Test
public void testMarcRecordFromMapsWithSameSubfieldId() {
// 016=[{7_=[{a=foo}, {a=bar}}]
Map<String, Object> f1 = Map.of("7_", List.of(Map.of("a", "foo"), Map.of("a", "bar")));
Map<String, Object> map = Map.of("016", List.of(f1));
MarcRecord marcRecord = MarcRecord.from(map);
// we must have a single 016 field
List<MarcField> list = new LinkedList<>();
marcRecord.all(f -> "016".equals(f.getTag()), list::add);
assertEquals(1, list.size());
// we count for occurences of "foo" and "bar", both must exist
AtomicInteger count = new AtomicInteger();
marcRecord.all(f -> "016".equals(f.getTag()) && "7 ".equals(f.getIndicator()), f ->
f.getSubfield("a").forEach(sf -> {
if ("foo".equals(sf.getValue())) {
count.incrementAndGet();
}
if ("bar".equals(sf.getValue())) {
count.incrementAndGet();
}
}));
assertEquals(2, count.get());
}
@Test @Test
public void testMarcRecordFromMapAsMap() throws IOException { public void testMarcRecordFromMapAsMap() throws IOException {
Map<String, Object> map = new TreeMap<>(Map.of("001", "123", Map<String, Object> map = new TreeMap<>(Map.of("001", "123",