fix for #2, convert space subfield codes into underscore, add Bundeskunsthalle MARC example, update to Gradle 4.3.1

This commit is contained in:
Jörg Prante 2017-12-07 18:24:40 +01:00
parent 195abf74de
commit 58c2a49293
8 changed files with 119 additions and 10 deletions

View file

@ -1,7 +1,7 @@
plugins {
id "org.sonarqube" version "2.5"
id "org.sonarqube" version "2.6.1"
id "io.codearte.nexus-staging" version "0.11.0"
id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0"
id "io.codearte.nexus-staging" version "0.7.0"
}
printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" +
@ -24,8 +24,8 @@ apply plugin: 'findbugs'
apply plugin: 'pmd'
apply plugin: 'checkstyle'
apply plugin: "jacoco"
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
apply plugin: "io.codearte.nexus-staging"
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
repositories {
mavenCentral()

View file

@ -1,6 +1,6 @@
group = org.xbib
name = marc
version = 1.0.16
version = 1.0.17
xbib-content.version = 1.0.7
xbib-bibliographic-character-sets.version = 1.0.0

Binary file not shown.

View file

@ -1,6 +1,6 @@
#Mon Oct 16 11:03:23 CEST 2017
#Tue Dec 05 21:14:54 CET 2017
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-4.3.1-all.zip

View file

@ -36,7 +36,9 @@ import java.util.stream.Collectors;
public class MarcRecord extends LinkedHashMap<String, Object> {
private static final MarcRecord EMPTY = Marc.builder().buildRecord();
private static final long serialVersionUID = 5305809148724342653L;
private final String format;
private final String type;
@ -183,7 +185,9 @@ public class MarcRecord extends LinkedHashMap<String, Object> {
}
Map<String, Object> subfields = (Map<String, Object>) indicators.get(indicator);
for (MarcField.Subfield subfield : marcField.getSubfields()) {
Object subfieldValue = subfields.get(subfield.getId());
String code = subfield.getId();
code = code.replace(' ', '_');
Object subfieldValue = subfields.get(code);
if (subfieldValue instanceof List) {
List<String> list = (List<String>) subfieldValue;
list.add(subfield.getValue());
@ -191,9 +195,9 @@ public class MarcRecord extends LinkedHashMap<String, Object> {
List<String> list = new LinkedList<>();
list.add((String) subfieldValue);
list.add(subfield.getValue());
subfields.put(subfield.getId(), list);
subfields.put(code, list);
} else {
subfields.put(subfield.getId(), subfield.getValue());
subfields.put(code, subfield.getValue());
}
}
} else {

View file

@ -56,23 +56,41 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
private static final Logger logger = Logger.getLogger(MarcJsonWriter.class.getName());
private static final int DEFAULT_BUFFER_SIZE = 65536;
private static final Pattern quotePattern = Pattern.compile("\"", Pattern.LITERAL);
private static final Pattern backslashPattern = Pattern.compile("\\\\");
private static final String ESCAPE_QUOTE = "\\\"";
private static final String ESCAPE_BACKSLASH = "\\\\";
private final Lock lock;
private final StringBuilder sb;
private Writer writer;
private Marc.Builder builder;
private boolean fatalErrors;
private Style style;
private Exception exception;
private String fileNamePattern;
private AtomicInteger fileNameCounter;
private int splitlimit;
private int bufferSize;
private boolean compress;
private String index;
private String indexType;
/**
* Flag for indicating if writer is at top of file.

View file

@ -260,7 +260,7 @@ public class MarcJsonWriterTest {
@Test
public void elasticsearchBulkFormatCompressed() throws Exception {
String s = "IRMARC8.bin";
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
InputStream in = getClass().getResource("/org/xbib/marc/" + s).openStream();
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
// split at 3, Elasticsearch bulk format, buffer size 65536, compress = true
@ -295,4 +295,21 @@ public class MarcJsonWriterTest {
}
}
@Test
public void testBundeskunsthalle() throws Exception {
String s = "bundeskunsthalle.xml";
InputStream in = getClass().getResource("/org/xbib/marc/xml/" + s).openStream();
try (MarcJsonWriter writer = new MarcJsonWriter("build/bk-bulk%d.jsonl", 1,
MarcJsonWriter.Style.ELASTICSEARCH_BULK)
.setIndex("testindex", "testtype")) {
Marc.builder()
.setFormat(MarcXchangeConstants.MARCXCHANGE_FORMAT)
.setType(MarcXchangeConstants.BIBLIOGRAPHIC_TYPE)
.setInputStream(in)
.setMarcListener(writer)
.build()
.xmlReader().parse();
assertNull(writer.getException());
}
}
}

View file

@ -0,0 +1,70 @@
<record xmlns="http://www.loc.gov/MARC21/slim" id="_048861" type="Bibliographic" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<leader>00000nam a2200024ui 4500</leader>
<controlfield tag="001">048861</controlfield>
<controlfield tag="003">DE-Bo412</controlfield>
<controlfield tag="005">20020528155543.0</controlfield>
<controlfield tag="008">020528s1991 xx |||| |00||||fre|d</controlfield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">2733501968</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DE-Bo412</subfield>
<subfield code="b">ger</subfield>
<subfield code="c">DE-Bo412</subfield>
<subfield code="e">rakwb</subfield>
</datafield>
<datafield tag="041" ind1=" " ind2=" ">
<subfield code="a">fre</subfield>
</datafield>
<datafield tag="100" ind1="1" ind2=" ">
<subfield code="a">Grinfelder, Marie-Hélène.</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="4">
<subfield code="a">Les années Supports Surfaces :</subfield>
<subfield code="b">1965-1990 /</subfield>
<subfield code="c">Marie-Hélène Grinfelder</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="a">Paris :</subfield>
<subfield code="b">Herscher,</subfield>
<subfield code="c">1991</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">431 S.:</subfield>
<subfield code="b">Ill.</subfield>
</datafield>
<datafield tag="600" ind1="0" ind2="7">
<subfield code="a">Supports.</subfield>
</datafield>
<datafield tag="600" ind1="0" ind2="7">
<subfield code="a">.</subfield>
<subfield code=" ">Surfaces &lt;Künstlergemeinschaft&gt;</subfield>
</datafield>
<datafield tag="648" ind1=" " ind2="7">
<subfield code="a">1965-1990.</subfield>
<subfield code="2">local</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="7">
<subfield code="a">Künstlervereinigung.</subfield>
<subfield code="0">(DE-Bo412)ss4165895</subfield>
<subfield code="0">(DE-588)4165895-4</subfield>
<subfield code="0">(DE-588c)4165895-4</subfield>
<subfield code="0">(uri)http://d-nb.info/gnd/4165895-4</subfield>
<subfield code="2">gnd</subfield>
</datafield>
<datafield tag="651" ind1=" " ind2="7">
<subfield code="a">Frankreich.</subfield>
<subfield code="2">local</subfield>
</datafield>
<datafield tag="850" ind1=" " ind2=" ">
<subfield code="a">DE-Bo412</subfield>
</datafield>
<datafield tag="852" ind1="4" ind2=" ">
<subfield code="z">Signatur</subfield>
<subfield code="j">Kc 200 Suppor D/1</subfield>
</datafield>
<datafield tag="852" ind1="8" ind2=" ">
<subfield code="z">Inventarnummer</subfield>
<subfield code="p">95-247</subfield>
</datafield>
</record>