From 58c2a49293af4b91f2ebc00bc964e5c43b344585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Thu, 7 Dec 2017 18:24:40 +0100 Subject: [PATCH] fix for #2, convert space subfield codes into underscore, add Bundeskunsthalle MARC example, update to Gradle 4.3.1 --- build.gradle | 6 +- gradle.properties | 2 +- gradle/wrapper/gradle-wrapper.jar | Bin 54708 -> 54731 bytes gradle/wrapper/gradle-wrapper.properties | 4 +- src/main/java/org/xbib/marc/MarcRecord.java | 10 ++- .../org/xbib/marc/json/MarcJsonWriter.java | 18 +++++ .../xbib/marc/json/MarcJsonWriterTest.java | 19 ++++- .../org/xbib/marc/xml/bundeskunsthalle.xml | 70 ++++++++++++++++++ 8 files changed, 119 insertions(+), 10 deletions(-) create mode 100644 src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml diff --git a/build.gradle b/build.gradle index 03f5bb5..190cc5e 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ plugins { - id "org.sonarqube" version "2.5" + id "org.sonarqube" version "2.6.1" + id "io.codearte.nexus-staging" version "0.11.0" id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0" - id "io.codearte.nexus-staging" version "0.7.0" } printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" + @@ -24,8 +24,8 @@ apply plugin: 'findbugs' apply plugin: 'pmd' apply plugin: 'checkstyle' apply plugin: "jacoco" -apply plugin: 'org.xbib.gradle.plugin.asciidoctor' apply plugin: "io.codearte.nexus-staging" +apply plugin: 'org.xbib.gradle.plugin.asciidoctor' repositories { mavenCentral() diff --git a/gradle.properties b/gradle.properties index beb39f1..6fe171c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ group = org.xbib name = marc -version = 1.0.16 +version = 1.0.17 xbib-content.version = 1.0.7 xbib-bibliographic-character-sets.version = 1.0.0 diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 736fb7d3f94c051b359fc7ae7212d351bc094bdd..6b6ea3ab4ff4f69d55c5fd9c0a6ac70f47d41008 100644 GIT binary patch delta 2300 zcmY+Fc|26>AIE2obw~!;#@KU>5uvzZG?uc*AZ5u`S+a$Yq&t$Xh(V8avXqiEh@m2D zF*L?9cA~m&DO&L~k%g&ikUuUWjOh+9=R+V~iXafb0Nq@9klw!u2t;7WBwVtm(pJ;q z>&oo1$qLHN!?IJahQF*YNyN>0vg{OtDw1sq=RT#T4199@l+v(Hsd_~B&$m~XC^n&) zG(TJX?eq)cw{Oe8BDO!D#@+R^Wym=-_}$|1)O~VRwnd}d{d1#2CpNq*LgF~HXBr>Q zxN^jM?}5GAX#?{b%H=1o=aBPzP7B2QclOz*E!8GbY*c;NEJ;t?q}TVAxR{Xn^9cd;P`UG8-=j*{(Z)XRTh1?y zo>LL6{^*`j5MQ|mh-BPYzSJ@;`u&}SL*%#69icKyWzWJ2 zhwvN@Z}Giq+sJO*cAVj);A8s7!j|gS_SsCTWk8q5p;6%`-D^s}6g_efpM7rm09Q)N zvi_fMdfxf!!OMr^d_By9x_VLWdL`J~+XjSB+qQv@>Z))D&Rylz8okh_V|QfNy^&ZF zWrXg(Blu>EZ>?~%cRIPQ?RN!jK9l7KQ;@W2J;5Vdr#0BqJtfFHNBw|cgZ3`;6?TcG z*Tvd8zSij8mz1QdM-vTsXqBv?j$Jl2uJUgkYkKWJ&K~V8?)#oz+9-Emsk$P3QmQe7diZ2p4U3=a?y3tIDqL8|SQhs)FMdhK;0^887xpWEc>mZ)2z{*tEZFmhsgc_|U?Z1`Xi#@tF zb6&7?dXD(uX6qY2ZR1q`0=EGtOgY1@!FuShjPAFVDHgD4JH;oEnt3xq&>~SatjAA< zXzu>pV+^~z6rVe+8($faJL~YcDx0lf%r9GU!MUjiqKfWML_Ca%%BAsM zy)jG|&unceqr@ksxKL;%w+d5SV~kxo@%rluGvGsWhDlkFv?MvCzipOU5Vd>6p?o8aWfUUv#jbNA>$BhFpc?&cfZ^n%+)d zQzSBSb*R|9Cm{x_!AD*&@=g$ur%+yz#e(;WP;<%mo(4P(akkewBFuQaKF*YE?7Ykh zIA+&|5mzUKqE^Rip71$?mBl1lPR*|27wsfF9$rw=kEy>G6YC&0zJnJ*vJ((2AWOMO za&LGEKBMs-4*0~2 zfJUy}dInzPa7GS(SG_?D6m%AjE}c~aVZ&%}gB$TD!;wxh7W8u?F*XLmrNa@4y%)k~ z0^Lj^OkS}uNO^!9Itpp#azhP}HDnCo@?q(gzX|TaOWin6VJZxbsZ!i-c<5yjTvoJL zj9U*65lUf;9kGVu%fQ((9H5NgA+AbgL88bt(h5F|^$OKXvZRWd1Y3FW}smCjWu1AK_n(F!1{T delta 2205 zcmY+F3p7;w9>-_4I|mb)h!|!RVHzZ_2-y^39~* zy|3T(W7S)wh`kcfP2LklieR^tSRd=Z5}Fk}%3ZLsYu4t$N(rdPU4-Lr8# zRq15M+S}mm;8j5J6O6BSe%;XV>CT($a!sB+l_LqFKD$FA9g*GRbO)dCnMRi?kUh$r zWX{DV-`~BgPX8`aJ#~5MsG8L-tuV{nU-u;jTRnZ(EfaC>Q`PYitvHDl>DfO0rQ=CoCD$6?`+I1tz%Q0OOfw6PE8cOgzud7KBBA9 zrgYlTOseg$f8lt|f}XSatkmPioIlF%D9PwwHv2SN-P}}JeD2sBUwdvaM*iWZIesaS zKOjV0Fm$*(iLz70Un#(-G^2|NDsHY`MM@^A=SA71Nh`^B;PcTf%DSb=UG-NNy+NTDm28dU;B?k=hv_{J zr1YYk92mYCW!2zf?|1r4Cweu@!TvB2z3`HGVI7SR-p5{FvmqsixrL`yKi5nf**HbX zdrxh#nk;{UUJbcV&v0A3nIS!5 zGVF3HseP<^(?r00=5=&-=A%a*r<5l%SYeh&bfs|KE?QiAB|CT`GIDo}hn;aw2D^rR zZqj!pu{L%-s=A=3nOd93-VYV@Z=&p{j|*PK%Y~*bRDR#w z#>>GA8cPnI&wgWl-H_bzTaJwW`GV*FVtln`2ugh8uB>ID9d`Yrb=*}=YdP$aVy!p# z*q%Dy9i^$5Cxo%bCGZ@P{qzwmKcl`?S#RrWq zvjKcyw;%%+Bq z$CsxlI!Wtvw6-iq$%jUuo)D&Lt6gimS80XgroIww z-wf{7CSG!thY;>Al2VJJFP{{|%l9Vn36TA2fYzt`|1}*{^?4$TfvC?A*#-3a&5=T& zPEi7>{X|F&3AR!cfg2JF{J9t)=*J>`0CK<`vXq2_QE4dqNdLf%pkxpOzHGtB zh+F+Qgj@bsyIolcTzP;2Lgk+eW|ZLq8I`T#kO*a|!`QBl1vATfxA3^ZWNWY8AE z>BG`pUryYL2ubdQ(bZHEyc#ruR84=NEU@4Zp_7g<);r3;ZHqWqBnC8as1SBP9Ow`2 zhU~pync{^5%poGA?I%Vx5bUP_zK9W`h>-}~hKZ0I73fp70Be|ti~+;L)=()Gv{Q|N z^#~Cn92QFz5OjFE7%`xE1dHSX-bf%676VSl=>BD@{cWrK?{MIIZ!CBbO8|9TBE(1p z$Wl%4jcbTJ2X>7fhEz`g!%+h088w6CPQn>}Y%k<_8kSjOZqU*hSpIv=9#Xgf%R{`q zkYXMzBlFb2M;;N9x(K87LmNn`5SCGeTf_+l3_ip{3YTHaxmX4$jPHV089z`B+!@1w zOXFCGcU5eezz;Z={rv58%HZ16GCUk+mH!-4%f+DvD4W { private static final MarcRecord EMPTY = Marc.builder().buildRecord(); + private static final long serialVersionUID = 5305809148724342653L; + private final String format; private final String type; @@ -183,7 +185,9 @@ public class MarcRecord extends LinkedHashMap { } Map subfields = (Map) indicators.get(indicator); for (MarcField.Subfield subfield : marcField.getSubfields()) { - Object subfieldValue = subfields.get(subfield.getId()); + String code = subfield.getId(); + code = code.replace(' ', '_'); + Object subfieldValue = subfields.get(code); if (subfieldValue instanceof List) { List list = (List) subfieldValue; list.add(subfield.getValue()); @@ -191,9 +195,9 @@ public class MarcRecord extends LinkedHashMap { List list = new LinkedList<>(); list.add((String) subfieldValue); list.add(subfield.getValue()); - subfields.put(subfield.getId(), list); + subfields.put(code, list); } else { - subfields.put(subfield.getId(), subfield.getValue()); + subfields.put(code, subfield.getValue()); } } } else { diff --git a/src/main/java/org/xbib/marc/json/MarcJsonWriter.java b/src/main/java/org/xbib/marc/json/MarcJsonWriter.java index e4a038e..275cd85 100644 --- a/src/main/java/org/xbib/marc/json/MarcJsonWriter.java +++ b/src/main/java/org/xbib/marc/json/MarcJsonWriter.java @@ -56,23 +56,41 @@ public class MarcJsonWriter extends MarcContentHandler implements Flushable, Clo private static final Logger logger = Logger.getLogger(MarcJsonWriter.class.getName()); private static final int DEFAULT_BUFFER_SIZE = 65536; + private static final Pattern quotePattern = Pattern.compile("\"", Pattern.LITERAL); + private static final Pattern backslashPattern = Pattern.compile("\\\\"); + private static final String ESCAPE_QUOTE = "\\\""; + private static final String ESCAPE_BACKSLASH = "\\\\"; + private final Lock lock; + private final StringBuilder sb; + private Writer writer; + private Marc.Builder builder; + private boolean fatalErrors; + private Style style; + private Exception exception; + private String fileNamePattern; + private AtomicInteger fileNameCounter; + private int splitlimit; + private int bufferSize; + private boolean compress; + private String index; + private String indexType; /** * Flag for indicating if writer is at top of file. diff --git a/src/test/java/org/xbib/marc/json/MarcJsonWriterTest.java b/src/test/java/org/xbib/marc/json/MarcJsonWriterTest.java index ac87db5..c369f71 100644 --- a/src/test/java/org/xbib/marc/json/MarcJsonWriterTest.java +++ b/src/test/java/org/xbib/marc/json/MarcJsonWriterTest.java @@ -260,7 +260,7 @@ public class MarcJsonWriterTest { @Test public void elasticsearchBulkFormatCompressed() throws Exception { String s = "IRMARC8.bin"; - InputStream in = getClass().getResource("/org/xbib/marc//" + s).openStream(); + InputStream in = getClass().getResource("/org/xbib/marc/" + s).openStream(); MarcValueTransformers marcValueTransformers = new MarcValueTransformers(); marcValueTransformers.setMarcValueTransformer(value -> Normalizer.normalize(value, Normalizer.Form.NFC)); // split at 3, Elasticsearch bulk format, buffer size 65536, compress = true @@ -295,4 +295,21 @@ public class MarcJsonWriterTest { } } + @Test + public void testBundeskunsthalle() throws Exception { + String s = "bundeskunsthalle.xml"; + InputStream in = getClass().getResource("/org/xbib/marc/xml/" + s).openStream(); + try (MarcJsonWriter writer = new MarcJsonWriter("build/bk-bulk%d.jsonl", 1, + MarcJsonWriter.Style.ELASTICSEARCH_BULK) + .setIndex("testindex", "testtype")) { + Marc.builder() + .setFormat(MarcXchangeConstants.MARCXCHANGE_FORMAT) + .setType(MarcXchangeConstants.BIBLIOGRAPHIC_TYPE) + .setInputStream(in) + .setMarcListener(writer) + .build() + .xmlReader().parse(); + assertNull(writer.getException()); + } + } } diff --git a/src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml b/src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml new file mode 100644 index 0000000..673524f --- /dev/null +++ b/src/test/resources/org/xbib/marc/xml/bundeskunsthalle.xml @@ -0,0 +1,70 @@ + + 00000nam a2200024ui 4500 + 048861 + DE-Bo412 + 20020528155543.0 + 020528s1991 xx |||| |00||||fre|d + + 2733501968 + + + DE-Bo412 + ger + DE-Bo412 + rakwb + + + fre + + + Grinfelder, Marie-Hélène. + + + Les années Supports Surfaces : + 1965-1990 / + Marie-Hélène Grinfelder + + + Paris : + Herscher, + 1991 + + + 431 S.: + Ill. + + + Supports. + + + . + Surfaces <Künstlergemeinschaft> + + + 1965-1990. + local + + + Künstlervereinigung. + (DE-Bo412)ss4165895 + (DE-588)4165895-4 + (DE-588c)4165895-4 + (uri)http://d-nb.info/gnd/4165895-4 + gnd + + + Frankreich. + local + + + DE-Bo412 + + + Signatur + Kc 200 Suppor D/1 + + + Inventarnummer + 95-247 + + \ No newline at end of file