fix for #2, convert space subfield codes into underscore, add Bundeskunsthalle MARC example, update to Gradle 4.3.1
This commit is contained in:
parent
195abf74de
commit
58c2a49293
8 changed files with 119 additions and 10 deletions
|
@ -1,7 +1,7 @@
|
||||||
plugins {
|
plugins {
|
||||||
id "org.sonarqube" version "2.5"
|
id "org.sonarqube" version "2.6.1"
|
||||||
|
id "io.codearte.nexus-staging" version "0.11.0"
|
||||||
id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0"
|
id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0"
|
||||||
id "io.codearte.nexus-staging" version "0.7.0"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" +
|
printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" +
|
||||||
|
@ -24,8 +24,8 @@ apply plugin: 'findbugs'
|
||||||
apply plugin: 'pmd'
|
apply plugin: 'pmd'
|
||||||
apply plugin: 'checkstyle'
|
apply plugin: 'checkstyle'
|
||||||
apply plugin: "jacoco"
|
apply plugin: "jacoco"
|
||||||
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
|
|
||||||
apply plugin: "io.codearte.nexus-staging"
|
apply plugin: "io.codearte.nexus-staging"
|
||||||
|
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
group = org.xbib
|
group = org.xbib
|
||||||
name = marc
|
name = marc
|
||||||
version = 1.0.16
|
version = 1.0.17
|
||||||
|
|
||||||
xbib-content.version = 1.0.7
|
xbib-content.version = 1.0.7
|
||||||
xbib-bibliographic-character-sets.version = 1.0.0
|
xbib-bibliographic-character-sets.version = 1.0.0
|
||||||
|
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
4
gradle/wrapper/gradle-wrapper.properties
vendored
4
gradle/wrapper/gradle-wrapper.properties
vendored
|
@ -1,6 +1,6 @@
|
||||||
#Mon Oct 16 11:03:23 CEST 2017
|
#Tue Dec 05 21:14:54 CET 2017
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-4.2-all.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-4.3.1-all.zip
|
||||||
|
|
|
@ -36,7 +36,9 @@ import java.util.stream.Collectors;
|
||||||
public class MarcRecord extends LinkedHashMap<String, Object> {
|
public class MarcRecord extends LinkedHashMap<String, Object> {
|
||||||
|
|
||||||
private static final MarcRecord EMPTY = Marc.builder().buildRecord();
|
private static final MarcRecord EMPTY = Marc.builder().buildRecord();
|
||||||
|
|
||||||
private static final long serialVersionUID = 5305809148724342653L;
|
private static final long serialVersionUID = 5305809148724342653L;
|
||||||
|
|
||||||
private final String format;
|
private final String format;
|
||||||
|
|
||||||
private final String type;
|
private final String type;
|
||||||
|
@ -183,7 +185,9 @@ public class MarcRecord extends LinkedHashMap<String, Object> {
|
||||||
}
|
}
|
||||||
Map<String, Object> subfields = (Map<String, Object>) indicators.get(indicator);
|
Map<String, Object> subfields = (Map<String, Object>) indicators.get(indicator);
|
||||||
for (MarcField.Subfield subfield : marcField.getSubfields()) {
|
for (MarcField.Subfield subfield : marcField.getSubfields()) {
|
||||||
Object subfieldValue = subfields.get(subfield.getId());
|
String code = subfield.getId();
|
||||||
|
code = code.replace(' ', '_');
|
||||||
|
Object subfieldValue = subfields.get(code);
|
||||||
if (subfieldValue instanceof List) {
|
if (subfieldValue instanceof List) {
|
||||||
List<String> list = (List<String>) subfieldValue;
|
List<String> list = (List<String>) subfieldValue;
|
||||||
list.add(subfield.getValue());
|
list.add(subfield.getValue());
|
||||||
|
@ -191,9 +195,9 @@ public class MarcRecord extends LinkedHashMap<String, Object> {
|
||||||
List<String> list = new LinkedList<>();
|
List<String> list = new LinkedList<>();
|
||||||
list.add((String) subfieldValue);
|
list.add((String) subfieldValue);
|
||||||
list.add(subfield.getValue());
|
list.add(subfield.getValue());
|
||||||
subfields.put(subfield.getId(), list);
|
subfields.put(code, list);
|
||||||
} else {
|
} else {
|
||||||
subfields.put(subfield.getId(), subfield.getValue());
|
subfields.put(code, subfield.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -56,23 +56,41 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo
|
||||||
private static final Logger logger = Logger.getLogger(MarcJsonWriter.class.getName());
|
private static final Logger logger = Logger.getLogger(MarcJsonWriter.class.getName());
|
||||||
|
|
||||||
private static final int DEFAULT_BUFFER_SIZE = 65536;
|
private static final int DEFAULT_BUFFER_SIZE = 65536;
|
||||||
|
|
||||||
private static final Pattern quotePattern = Pattern.compile("\"", Pattern.LITERAL);
|
private static final Pattern quotePattern = Pattern.compile("\"", Pattern.LITERAL);
|
||||||
|
|
||||||
private static final Pattern backslashPattern = Pattern.compile("\\\\");
|
private static final Pattern backslashPattern = Pattern.compile("\\\\");
|
||||||
|
|
||||||
private static final String ESCAPE_QUOTE = "\\\"";
|
private static final String ESCAPE_QUOTE = "\\\"";
|
||||||
|
|
||||||
private static final String ESCAPE_BACKSLASH = "\\\\";
|
private static final String ESCAPE_BACKSLASH = "\\\\";
|
||||||
|
|
||||||
private final Lock lock;
|
private final Lock lock;
|
||||||
|
|
||||||
private final StringBuilder sb;
|
private final StringBuilder sb;
|
||||||
|
|
||||||
private Writer writer;
|
private Writer writer;
|
||||||
|
|
||||||
private Marc.Builder builder;
|
private Marc.Builder builder;
|
||||||
|
|
||||||
private boolean fatalErrors;
|
private boolean fatalErrors;
|
||||||
|
|
||||||
private Style style;
|
private Style style;
|
||||||
|
|
||||||
private Exception exception;
|
private Exception exception;
|
||||||
|
|
||||||
private String fileNamePattern;
|
private String fileNamePattern;
|
||||||
|
|
||||||
private AtomicInteger fileNameCounter;
|
private AtomicInteger fileNameCounter;
|
||||||
|
|
||||||
private int splitlimit;
|
private int splitlimit;
|
||||||
|
|
||||||
private int bufferSize;
|
private int bufferSize;
|
||||||
|
|
||||||
private boolean compress;
|
private boolean compress;
|
||||||
|
|
||||||
private String index;
|
private String index;
|
||||||
|
|
||||||
private String indexType;
|
private String indexType;
|
||||||
/**
|
/**
|
||||||
* Flag for indicating if writer is at top of file.
|
* Flag for indicating if writer is at top of file.
|
||||||
|
|
|
@ -260,7 +260,7 @@ public class MarcJsonWriterTest {
|
||||||
@Test
|
@Test
|
||||||
public void elasticsearchBulkFormatCompressed() throws Exception {
|
public void elasticsearchBulkFormatCompressed() throws Exception {
|
||||||
String s = "IRMARC8.bin";
|
String s = "IRMARC8.bin";
|
||||||
InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream();
|
InputStream in = getClass().getResource("/org/xbib/marc/" + s).openStream();
|
||||||
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
MarcValueTransformers marcValueTransformers = new MarcValueTransformers();
|
||||||
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC));
|
||||||
// split at 3, Elasticsearch bulk format, buffer size 65536, compress = true
|
// split at 3, Elasticsearch bulk format, buffer size 65536, compress = true
|
||||||
|
@ -295,4 +295,21 @@ public class MarcJsonWriterTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBundeskunsthalle() throws Exception {
|
||||||
|
String s = "bundeskunsthalle.xml";
|
||||||
|
InputStream in = getClass().getResource("/org/xbib/marc/xml/" + s).openStream();
|
||||||
|
try (MarcJsonWriter writer = new MarcJsonWriter("build/bk-bulk%d.jsonl", 1,
|
||||||
|
MarcJsonWriter.Style.ELASTICSEARCH_BULK)
|
||||||
|
.setIndex("testindex", "testtype")) {
|
||||||
|
Marc.builder()
|
||||||
|
.setFormat(MarcXchangeConstants.MARCXCHANGE_FORMAT)
|
||||||
|
.setType(MarcXchangeConstants.BIBLIOGRAPHIC_TYPE)
|
||||||
|
.setInputStream(in)
|
||||||
|
.setMarcListener(writer)
|
||||||
|
.build()
|
||||||
|
.xmlReader().parse();
|
||||||
|
assertNull(writer.getException());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
70
src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml
Normal file
70
src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
<record xmlns="http://www.loc.gov/MARC21/slim" id="_048861" type="Bibliographic" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
|
||||||
|
<leader>00000nam a2200024ui 4500</leader>
|
||||||
|
<controlfield tag="001">048861</controlfield>
|
||||||
|
<controlfield tag="003">DE-Bo412</controlfield>
|
||||||
|
<controlfield tag="005">20020528155543.0</controlfield>
|
||||||
|
<controlfield tag="008">020528s1991 xx |||| |00||||fre|d</controlfield>
|
||||||
|
<datafield tag="020" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">2733501968</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="040" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">DE-Bo412</subfield>
|
||||||
|
<subfield code="b">ger</subfield>
|
||||||
|
<subfield code="c">DE-Bo412</subfield>
|
||||||
|
<subfield code="e">rakwb</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="041" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">fre</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="100" ind1="1" ind2=" ">
|
||||||
|
<subfield code="a">Grinfelder, Marie-Hélène.</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="245" ind1="1" ind2="4">
|
||||||
|
<subfield code="a">Les années Supports Surfaces :</subfield>
|
||||||
|
<subfield code="b">1965-1990 /</subfield>
|
||||||
|
<subfield code="c">Marie-Hélène Grinfelder</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="260" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">Paris :</subfield>
|
||||||
|
<subfield code="b">Herscher,</subfield>
|
||||||
|
<subfield code="c">1991</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="300" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">431 S.:</subfield>
|
||||||
|
<subfield code="b">Ill.</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="600" ind1="0" ind2="7">
|
||||||
|
<subfield code="a">Supports.</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="600" ind1="0" ind2="7">
|
||||||
|
<subfield code="a">.</subfield>
|
||||||
|
<subfield code=" ">Surfaces <Künstlergemeinschaft></subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="648" ind1=" " ind2="7">
|
||||||
|
<subfield code="a">1965-1990.</subfield>
|
||||||
|
<subfield code="2">local</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="650" ind1=" " ind2="7">
|
||||||
|
<subfield code="a">Künstlervereinigung.</subfield>
|
||||||
|
<subfield code="0">(DE-Bo412)ss4165895</subfield>
|
||||||
|
<subfield code="0">(DE-588)4165895-4</subfield>
|
||||||
|
<subfield code="0">(DE-588c)4165895-4</subfield>
|
||||||
|
<subfield code="0">(uri)http://d-nb.info/gnd/4165895-4</subfield>
|
||||||
|
<subfield code="2">gnd</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="651" ind1=" " ind2="7">
|
||||||
|
<subfield code="a">Frankreich.</subfield>
|
||||||
|
<subfield code="2">local</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="850" ind1=" " ind2=" ">
|
||||||
|
<subfield code="a">DE-Bo412</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="852" ind1="4" ind2=" ">
|
||||||
|
<subfield code="z">Signatur</subfield>
|
||||||
|
<subfield code="j">Kc 200 Suppor D/1</subfield>
|
||||||
|
</datafield>
|
||||||
|
<datafield tag="852" ind1="8" ind2=" ">
|
||||||
|
<subfield code="z">Inventarnummer</subfield>
|
||||||
|
<subfield code="p">95-247</subfield>
|
||||||
|
</datafield>
|
||||||
|
</record>
|
Loading…
Reference in a new issue