From ea79e7afd6c19fb76615712d95bc47524fd2a17c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Mon, 17 Oct 2022 10:39:41 +0200 Subject: [PATCH] update to Gradle 7.5.1, update to Java 17, drop xbib net dependency because we rely on JDK HTTP client, add custom URL builder --- build.gradle | 2 +- gradle.properties | 2 + gradle/compile/java.gradle | 8 +- gradle/test/junit5.gradle | 12 +- gradle/wrapper/gradle-wrapper.jar | Bin 59536 -> 60756 bytes gradle/wrapper/gradle-wrapper.properties | 2 +- gradlew | 6 + gradlew.bat | 14 +- oai-client/build.gradle | 4 +- oai-client/src/main/java/module-info.java | 2 +- .../java/org/xbib/oai/client/OAIClient.java | 7 +- .../xbib/oai/client/util/PercentDecoder.java | 196 +++++++ .../xbib/oai/client/util/PercentEncoder.java | 187 +++++++ .../org/xbib/oai/client/util/UrlBuilder.java | 503 ++++++++++++++++++ .../oai/client/util/UrlPercentEncoders.java | 164 ++++++ .../org/xbib/oai/client/DOAJClientTest.java | 8 +- .../oai/client/util/PercentEncoderTest.java | 84 +++ .../xbib/oai/client/util/UrlBuilderTest.java | 425 +++++++++++++++ oai-common/build.gradle | 8 +- settings.gradle | 20 + 20 files changed, 1621 insertions(+), 33 deletions(-) create mode 100755 oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java create mode 100755 oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java create mode 100755 oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java create mode 100755 oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java create mode 100755 oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java create mode 100755 oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java diff --git a/build.gradle b/build.gradle index 7877e53..99ae975 100644 --- a/build.gradle +++ b/build.gradle @@ -4,7 +4,7 @@ plugins { } wrapper { - gradleVersion = "${project.property('gradle.wrapper.version')}" + gradleVersion = libs.versions.gradle.get() distributionType = Wrapper.DistributionType.ALL } diff --git a/gradle.properties b/gradle.properties index d1bfbe8..0b479b5 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,3 +1,5 @@ group = org.xbib name = oai version = 3.0.0 + +gradle.wrapper.version = 7.5.1 diff --git a/gradle/compile/java.gradle b/gradle/compile/java.gradle index b3744c5..9016125 100644 --- a/gradle/compile/java.gradle +++ b/gradle/compile/java.gradle @@ -6,13 +6,13 @@ java { } compileJava { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } compileTestJava { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } jar { diff --git a/gradle/test/junit5.gradle b/gradle/test/junit5.gradle index e1960cf..81b99a6 100644 --- a/gradle/test/junit5.gradle +++ b/gradle/test/junit5.gradle @@ -1,12 +1,8 @@ - -def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.8.2' -def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2' - dependencies { - testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}" - testImplementation "org.junit.jupiter:junit-jupiter-params:${junitVersion}" - testImplementation "org.hamcrest:hamcrest-library:${hamcrestVersion}" - testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}" + testImplementation libs.junit.jupiter.api + testImplementation libs.junit.jupiter.params + testImplementation libs.hamcrest + testRuntimeOnly libs.junit.jupiter.engine } test { diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 7454180f2ae8848c63b8b4dea2cb829da983f2fa..249e5832f090a2944b7473328c07c9755baa3196 100644 GIT binary patch delta 10158 zcmaKSbyOWsmn~e}-QC?axCPf>!2<-jxI0|j{UX8L-QC?axDz};a7}ppGBe+Nv*x{5 zy?WI?=j^WT(_Md5*V*xNP>X9&wM>xUvNiMuKDK=Xg!N%oM>Yru2rh7#yD-sW0Ov#$ zCKBSOD3>TM%&1T5t&#FK@|@1f)Ze+EE6(7`}J(Ek4})CD@I+W;L{ zO>K;wokKMA)EC6C|D@nz%D2L3U=Nm(qc>e4GM3WsHGu-T?l^PV6m-T-(igun?PZ8U z{qbiLDMcGSF1`FiKhlsV@qPMRm~h9@z3DZmWp;Suh%5BdP6jqHn}$-gu`_xNg|j{PSJ0n$ zbE;Azwq8z6IBlgKIEKc4V?*##hGW#t*rh=f<;~RFWotXS$vr;Mqz>A99PMH3N5BMi zWLNRjc57*z`2)gBV0o4rcGM(u*EG8_H5(|kThAnp|}u2xz>>X6tN zv)$|P2Nr1D*fk4wvqf(7;NmdRV3eL{!>DO-B98(s*-4$g{)EnRYAw+DP-C`=k)B!* zHU7!ejcbavGCYuz9k@$aZQaU%#K%6`D}=N_m?~^)IcmQZun+K)fSIoS>Ws zwvZ%Rfmw>%c!kCd~Pmf$E%LCj2r>+FzKGDm+%u88|hHprot{*OIVpi`Vd^^aumtx2L}h} zPu$v~zdHaWPF<`LVQX4i7bk82h#RwRyORx*z3I}o&>>eBDCif%s7&*vF6kU%1` zf(bvILch^~>cQ{=Y#?nx(8C-Uuv7!2_YeCfo?zkP;FK zX+KdjKS;HQ+7 zj>MCBI=d$~9KDJ1I2sb_3=T6D+Mu9{O&vcTnDA(I#<=L8csjEqsOe=&`=QBc7~>u2 zfdcO44PUOST%PcN+8PzKFYoR0;KJ$-Nwu#MgSM{_!?r&%rVM}acp>53if|vpH)q=O z;6uAi__am8g$EjZ33?PmCrg@(M!V_@(^+#wAWNu&e3*pGlfhF2<3NobAC zlusz>wMV--3ytd@S047g)-J@eOD;DMnC~@zvS=Gnw3=LnRzkeV`LH4#JGPklE4!Q3 zq&;|yGR0FiuE-|&1p2g{MG!Z3)oO9Jf4@0h*3!+RHv=SiEf*oGQCSRQf=LqT5~sajcJ8XjE>E*@q$n z!4|Rz%Lv8TgI23JV6%)N&`Otk6&RBdS|lCe7+#yAfdyEWNTfFb&*S6-;Q}d`de!}*3vM(z71&3 z37B%@GWjeQ_$lr%`m-8B&Zl4Gv^X{+N{GCsQGr!LLU4SHmLt3{B*z-HP{73G8u>nK zHxNQ4eduv>lARQfULUtIlLx#7ea+O;w?LH}FF28c9pg#*M`pB~{jQmPB*gA;Hik#e zZpz&X#O}}r#O_#oSr4f`zN^wedt>ST791bAZ5(=g<Oj)m9X8J^>Th}fznPY0T zsD9ayM7Hrlb6?jHXL<{kdA*Q#UPCYce0p`fHxoZ7_P`cF-$1YY9Pi;0QFt{CCf%C# zuF60A_NTstTQeFR3)O*ThlWKk08}7Nshh}J-sGY=gzE!?(_ZI4ovF6oZ$)&Zt~WZi z_0@Bk!~R4+<&b6CjI{nGj+P{*+9}6;{RwZ7^?H)xjhiRi;?A|wb0UxjPr?L@$^v|0= z@6d3+eU|&re3+G*XgFS}tih3;>2-R1x>`2hmUb5+Z~eM4P|$ zAxvE$l@sIhf_#YLnF|Wcfp(Gh@@dJ-yh|FhKqsyQp_>7j1)w|~5OKETx2P$~`}5huK;{gw_~HXP6=RsG)FKSZ=VYkt+0z&D zr?`R3bqVV?Zmqj&PQ`G3b^PIrd{_K|Hhqt zAUS#|*WpEOeZ{@h*j6%wYsrL`oHNV=z*^}yT1NCTgk1-Gl(&+TqZhODTKb9|0$3;| z;{UUq7X9Oz`*gwbi|?&USWH?Fr;6=@Be4w=8zu>DLUsrwf+7A`)lpdGykP`^SA8{ok{KE3sM$N@l}kB2GDe7MEN? zWcQ2I0fJ1ZK%s-YKk?QbEBO6`C{bg$%le0FTgfmSan-Kih0A7)rGy|2gd)_gRH7qp z*bNlP0u|S^5<)kFcd&wQg*6QP5;y(3ZgI%vUgWk#`g!sMf`02>@xz{Ie9_-fXllyw zh>P%cK+-HkQ;D$Jh=ig(ASN^zJ7|q*#m;}2M*T#s0a^nF_>jI(L(|*}#|$O&B^t!W zv-^-vP)kuu+b%(o3j)B@do)n*Y0x%YNy`sYj*-z2ncYoggD6l z6{1LndTQUh+GCX;7rCrT z@=vy&^1zyl{#7vRPv;R^PZPaIks8okq)To8!Cks0&`Y^Xy5iOWC+MmCg0Jl?1ufXO zaK8Q5IO~J&E|<;MnF_oXLc=LU#m{6yeomA^Ood;)fEqGPeD|fJiz(`OHF_f*{oWJq z1_$NF&Mo7@GKae#f4AD|KIkGVi~ubOj1C>>WCpQq>MeDTR_2xL01^+K1+ zr$}J>d=fW{65hi2bz&zqRKs8zpDln z*7+Gtfz6rkgfj~#{MB=49FRP;ge*e0=x#czw5N{@T1{EAl;G&@tpS!+&2&Stf<%<+55R18u2%+}`?PZo8xg|Y9Xli(fSQyC7 z+O5{;ZyW$!eYR~gy>;l6cA+e`oXN6a6t(&kUkWus*Kf<m$W7L)w5uXYF)->OeWMSUVXi;N#sY zvz4c?GkBU{D;FaQ)9|HU7$?BX8DFH%hC11a@6s4lI}y{XrB~jd{w1x&6bD?gemdlV z-+ZnCcldFanu`P=S0S7XzwXO(7N9KV?AkgZzm|J&f{l-Dp<)|-S7?*@HBIfRxmo1% zcB4`;Al{w-OFD08g=Qochf9=gb56_FPc{C9N5UAjTcJ(`$>)wVhW=A<8i#!bmKD#6~wMBak^2(p56d2vs&O6s4>#NB0UVr24K z%cw|-Yv}g5`_zcEqrZBaRSoBm;BuXJM^+W$yUVS9?u(`87t)IokPgC_bQ3g_#@0Yg zywb?u{Di7zd3XQ$y!m^c`6~t-7@g-hwnTppbOXckS-^N?w1`kRMpC!mfMY?K#^Ldm zYL>771%d{+iqh4a&4RdLNt3_(^^*{U2!A>u^b{7e@}Azd_PiZ>d~(@(Q@EYElLAx3LgQ5(ZUf*I%EbGiBTG!g#=t zXbmPhWH`*B;aZI)$+PWX+W)z?3kTOi{2UY9*b9bpSU!GWcVu+)!^b4MJhf=U9c?jj z%V)EOF8X3qC5~+!Pmmmd@gXzbycd5Jdn!N#i^50a$4u}8^O}DG2$w-U|8QkR-WU1mk4pF z#_imS#~c2~Z{>!oE?wfYc+T+g=eJL`{bL6=Gf_lat2s=|RxgP!e#L|6XA8w{#(Po(xk1~rNQ4UiG``U`eKy7`ot;xv4 zdv54BHMXIq;#^B%W(b8xt%JRueW5PZsB2eW=s3k^Pe1C$-NN8~UA~)=Oy->22yJ%e zu=(XD^5s{MkmWB)AF_qCFf&SDH%ytqpt-jgs35XK8Ez5FUj?uD3++@2%*9+-65LGQ zvu1eopeQoFW98@kzU{+He9$Yj#`vaQkqu%?1wCoBd%G=)TROYl2trZa{AZ@#^LARR zdzg-?EUnt9dK2;W=zCcVj18RTj-%w^#pREbgpD0aL@_v-XV2&Cd@JB^(}GRBU}9gV z6sWmVZmFZ9qrBN%4b?seOcOdOZ+6cx8-#R(+LYKJu~Y%pF5#85aF9$MnP7r^Bu%D? zT{b-KBujiy>7_*9{8u0|mTJ(atnnnS%qBDM_Gx5>3V+2~Wt=EeT4cXOdud$+weM(>wdBg+cV$}6%(ccP;`!~CzW{0O2aLY z?rQtBB6`ZztPP@_&`kzDzxc==?a{PUPUbbX31Vy?_(;c+>3q*!df!K(LQYZNrZ>$A*8<4M%e8vj1`%(x9)d~);ym4p zoo518$>9Pe| zZaFGj);h?khh*kgUI-Xvj+Dr#r&~FhU=eQ--$ZcOY9;x%&3U(&)q}eJs=)K5kUgi5 zNaI-m&4?wlwFO^`5l-B?17w4RFk(IKy5fpS0K%txp0qOj$e=+1EUJbLd-u>TYNna~ z+m?gU0~xlcnP>J>%m_y_*7hVMj3d&)2xV8>F%J;6ncm)ILGzF2sPAV|uYk5!-F%jL(53^51BKr zc3g7+v^w<4WIhk7a#{N6Ku_u{F`eo;X+u!C(lIaiY#*V5!sMed39%-AgV*`(nI)Im zemHE^2foBMPyIP<*yuD21{6I?Co?_{pqp-*#N6sZRQAzEBV4HQheOyZT5UBd)>G85 zw^xHvCEP4AJk<{v2kQQ;g;C)rCY=X!c8rNpNJ4mHETN}t1rwSe7=s8u&LzW-+6AEB z)LX0o7`EqC94HM{4p}d2wOwj2EB|O;?&^FeG9ZrT%c!J&x`Z3D2!cm(UZbFBb`+h ztfhjq75yuSn2~|Pc)p$Ul6=)}7cfXtBsvc15f&(K{jnEsw5Gh0GM^O=JC+X-~@r1kI$=FH=yBzsO#PxR1xU9+T{KuPx7sMe~GX zSP>AT3%(Xs@Ez**e@GAn{-GvB^oa6}5^2s+Mg~Gw?#$u&ZP;u~mP|FXsVtr>3k9O?%v>`Ha-3QsOG<7KdXlqKrsN25R|K<<;- z8kFY!&J&Yrqx3ptevOHiqPxKo_wwAPD)$DWMz{0>{T5qM%>rMqGZ!dJdK(&tP1#89 zVcu}I1I-&3%nMyF62m%MDpl~p)PM(%YoR zD)=W)E7kjwzAr!?^P*`?=fMHd1q4yjLGTTRUidem^Ocjrfgk2Jp|6SabEVHKC3c>RX@tNx=&Z7gC z0ztZoZx+#o36xH8mv6;^e{vU;G{JW17kn(RO&0L%q^fpWSYSkr1Cb92@bV->VO5P z;=V{hS5wcROQfbah6ND{2a$zFnj>@yuOcw}X~E20g7)5=Z#(y)RC878{_rObmGQ;9 zUy>&`YT^2R@jqR1z9Fx&x)WBstIE#*UhAa>WrMm<10={@$UN@Cog+#pxq{W@l0DOf zJGs^Jv?t8HgIXk(;NFHXun$J{{p})cJ^BWn4BeQo6dMNp%JO@$9z{(}qqEHuZOUQP zZiwo70Oa@lMYL(W*R4(!oj`)9kRggJns-A|w+XL=P07>QBMTEbG^gPS)H zu^@MFTFZtsKGFHgj|hupbK({r>PX3_kc@|4Jdqr@gyyKrHw8Tu<#0&32Hh?S zsVm_kQ2K`4+=gjw1mVhdOz7dI7V!Iu8J1LgI+_rF`Wgx5-XwU~$h>b$%#$U3wWC-ea0P(At2SjPAm57kd;!W5k{do1}X681o}`!c*(w!kCjtGTh7`=!M)$9 zWjTns{<-WX+Xi;&d!lyV&1KT9dKL??8)fu2(?Ox<^?EAzt_(#5bp4wAfgIADYgLU` z;J7f8g%-tfmTI1ZHjgufKcAT4SO(vx?xSo4pdWh`3#Yk;DqPGQE0GD?!_CfXb(E8WoJt6*Yutnkvmb?7H9B zVICAYowwxK;VM4(#~|}~Ooyzm*1ddU_Yg%Ax*_FcZm^AzYc$<+9bv;Eucr(SSF}*JsjTfb*DY>qmmkt z;dRkB#~SylP~Jcmr&Bl9TxHf^DcGUelG%rA{&s)5*$|-ww}Kwx-lWnNeghVm@z zqi3@-oJnN%r2O4t9`5I5Zfc;^ROHmY6C9 z1VRRX*1+aBlbO_p>B+50f1p&%?_A*16R0n+l}HKWI$yIH3oq2`k4O?tEVd~a4~>iI zo{d}b8tr+$q<%%K%Ett*i|RAJEMnk9hU7LtL!lxOB45xO1g)ycDBd=NbpaE3j?Gw& z0M&xx13EkCgNHu%Z8rBLo93XH-zQUfF3{Iy>65-KSPniqIzF+?x$3>`L?oBOBeEsv zs_y7@7>IbS&w2Vju^#vBpPWQuUv=dDRGm(-MH|l+8T?vfgD;{nE_*-h?@D;GN>4hA z9{!G@ANfHZOxMq5kkoh4h*p3+zE7z$13ocDJR$XA*7uKtG5Cn_-ibn%2h{ z;J0m5aCjg(@_!G>i2FDAvcn5-Aby8b;J0u%u)!`PK#%0FS-C3(cq9J{V`DJEbbE|| zYpTDd+ulcjEd5`&v!?=hVgz&S0|C^We?2|>9|2T6?~nn^_CpLn&kuI|VG7_E{Ofu9 zAqe0Reuq5Zunlx@zyTqEL+ssT15X|Z0LUfZAr-i$1_SJ{j}BHmBm}s8{OgK3lm%4F zzC%jz!y!8WUJo2FLkU(mVh7-uzC+gcbkV^bM}&Y6=HTTca{!7ZSoB!)l|v<(3ly!jq&P5A2q(U5~h)))aj-`-6&aM~LBySnAy zA0{Z{FHiUb8rW|Yo%kQwi`Kh>EEE$0g7UxeeeVkcY%~87yCmSjYyxoqq(%Jib*lH; zz`t5y094U`k_o{-*U^dFH~+1I@GsgwqmGsQC9-Vr0X94TLhlV;Kt#`9h-N?oKHqpx zzVAOxltd%gzb_Qu{NHnE8vPp=G$#S)Y%&6drobF_#NeY%VLzeod delta 9041 zcmY*t@kVBCBP!g$Qih>$!M(|j-I?-C8+=cK0w!?cVWy9LXH zd%I}(h%K_>9Qvap&`U=={XcolW-VA%#t9ljo~WmY8+Eb|zcKX3eyx7qiuU|a)zU5cYm5{k5IAa3ibZf_B&=YT!-XyLap%QRdebT+PIcg$KjM3HqA3uZ5|yBj2vv8$L{#$>P=xi+J&zLILkooDarGpiupEiuy`9uy&>yEr95d)64m+~`y*NClGrY|5MLlv!)d5$QEtqW)BeBhrd)W5g1{S@J-t8_J1 zthp@?CJY}$LmSecnf3aicXde(pXfeCei4=~ZN=7VoeU|rEEIW^!UBtxGc6W$x6;0fjRs7Nn)*b9JW5*9uVAwi) zj&N7W;i<Qy80(5gsyEIEQm>_+4@4Ol)F?0{YzD(6V~e=zXmc2+R~P~< zuz5pju;(akH2+w5w!vnpoikD5_{L<6T`uCCi@_Uorr`L(8zh~x!yEK*!LN02Q1Iri z>v*dEX<(+_;6ZAOIzxm@PbfY4a>ws4D82&_{9UHCfll!x`6o8*i0ZB+B#Ziv%RgtG z*S}<4!&COp)*ZMmXzl0A8mWA$)fCEzk$Wex*YdB}_-v|k9>jKy^Y>3me;{{|Ab~AL zQC(naNU=JtU3aP6P>Fm-!_k1XbhdS0t~?uJ$ZvLbvow10>nh*%_Kh>7AD#IflU8SL zMRF1fmMX#v8m=MGGb7y5r!Qf~Y}vBW}fsG<{1CHX7Yz z=w*V9(vOs6eO>CDuhurDTf3DVVF^j~rqP*7S-$MLSW7Ab>8H-80ly;9Q0BWoNV zz8Wr2CdK!rW0`sMD&y{Ue{`mEkXm0%S2k;J^iMe|sV5xQbt$ojzfQE+6aM9LWH`t& z8B;Ig7S<1Dwq`3W*w59L(opjq)ll4E-c?MivCh!4>$0^*=DKI&T2&j?;Z82_iZV$H zKmK7tEs7;MI-Vo(9wc1b)kc(t(Yk? z#Hgo8PG_jlF1^|6ge%;(MG~6fuKDFFd&}>BlhBTh&mmuKsn>2buYS=<5BWw^`ncCb zrCRWR5`IwKC@URU8^aOJjSrhvO>s}O&RBD8&V=Fk2@~zYY?$qO&!9%s>YecVY0zhK zBxKGTTyJ(uF`p27CqwPU1y7*)r}y;{|0FUO)-8dKT^>=LUoU_6P^^utg|* zuj}LBA*gS?4EeEdy$bn#FGex)`#y|vg77NVEjTUn8%t z@l|7T({SM!y$PZy9lb2N;BaF}MfGM%rZk10aqvUF`CDaC)&Av|eED$x_;qSoAka*2 z2rR+OTZTAPBx`vQ{;Z{B4Ad}}qOBqg>P4xf%ta|}9kJ2$od>@gyC6Bf&DUE>sqqBT zYA>(sA=Scl2C_EF8)9d8xwdBSnH5uL=I4hch6KCHj-{99IywUD{HR`d(vk@Kvl)WD zXC(v{ZTsyLy{rio*6Wi6Lck%L(7T~Is-F_`2R}q z!H1ylg_)Mv&_|b1{tVl!t{;PDa!0v6^Zqs_`RdxI%@vR)n|`i`7O<>CIMzqI00y{;` zhoMyy>1}>?kAk~ND6}`qlUR=B+a&bvA)BWf%`@N)gt@@Ji2`p1GzRGC$r1<2KBO3N z++YMLD9c|bxC;za_UVJ*r6&Ea;_YC>-Ebe-H=VAgDmx+?Q=DxCE4=yQXrn z7(0X#oIjyfZUd}fv2$;4?8y|0!L^ep_rMz|1gU-hcgVYIlI~o>o$K&)$rwo(KJO~R zDcGKo-@im7C<&2$6+q-xtxlR`I4vL|wFd<`a|T}*Nt;(~Vwx&2QG_j$r0DktR+6I4W)gUx*cDVBwGe00aa803ZYiwy;d{1p)y0?*IT8ddPS`E~MiS z1d%Vm0Hb4LN2*f8FZ|6xRQev@ZK-?(oPs+mT*{%NqhGL_0dJ$?rAxA{2 z`r3MBv&)xblcd>@hArncJpL~C(_HTo&D&CS!_J5Giz$^2EfR_)xjgPg`Bq^u%1C*+ z7W*HGp|{B?dOM}|E)Cs$61y8>&-rHBw;A8 zgkWw}r$nT%t(1^GLeAVyj1l@)6UkHdM!%LJg|0%BO74M593&LlrksrgoO{iEz$}HK z4V>WXgk|7Ya!Vgm#WO^ZLtVjxwZ&k5wT6RteViH3ds{VO+2xMJZ`hToOz~_+hRfY{ z%M;ZDKRNTsK5#h6goUF(h#VXSB|7byWWle*d0$IHP+FA`y)Q^5W!|&N$ndaHexdTn z{vf?T$(9b&tI&O`^+IqpCheAFth;KY(kSl2su_9|Y1B{o9`mm)z^E`Bqw!n+JCRO) zGbIpJ@spvz=*Jki{wufWm|m`)XmDsxvbJR5dLF=kuf_C>dl}{nGO(g4I$8 zSSW#5$?vqUDZHe_%`Zm?Amd^>I4SkBvy+i}wiQYBxj0F1a$*%T+6}Yz?lX&iQ}zaU zI@%8cwVGtF3!Ke3De$dL5^j-$Bh3+By zrSR3c2a>XtaE#TB}^#hq@!vnZ1(An#bk_eKR{?;Z&0cgh4$cMNU2HL=m=YjMTI zT$BRltXs4T=im;Ao+$Bk3Dz(3!C;rTqelJ?RF)d~dP9>$_6dbz=_8#MQFMMX0S$waWxY#mtDn}1U{4PGeRH5?a>{>TU@1UlucMAmzrd@PCwr|il)m1fooO7Z{Vyr z6wn=2A5z(9g9-OU10X_ei50@~)$}w4u)b+mt)z-sz0X32m}NKTt4>!O{^4wA(|3A8 zkr(DxtMnl$Hol>~XNUE?h9;*pGG&kl*q_pb z&*$lH70zI=D^s)fU~A7cg4^tUF6*Oa+3W0=7FFB*bf$Kbqw1&amO50YeZM)SDScqy zTw$-M$NA<_We!@4!|-?V3CEPnfN4t}AeM9W$iSWYz8f;5H)V$pRjMhRV@Z&jDz#FF zXyWh7UiIc7=0U9L35=$G54RjAupR&4j`(O3i?qjOk6gb!WjNtl1Fj-VmltDTos-Bl z*OLfOleS~o3`?l!jTYIG!V7?c<;Xu(&#~xf-f(-jwow-0Hv7JZG>}YKvB=rRbdMyv zmao*-!L?)##-S#V^}oRm7^Db zT5C2RFY4>ov~?w!3l_H}t=#X=vY-*LQy(w>u%r`zQ`_RukSqIv@WyGXa-ppbk-X=g zyn?TH(`-m*in(w=Ny$%dHNSVxsL|_+X=+kM+v_w{ZC(okof9k1RP5qDvcA-d&u{5U z?)a9LXht1f6|Tdy5FgXo;sqR|CKxDKruU9RjK~P6xN+4;0eAc|^x%UO^&NM4!nK_! z6X14Zkk=5tqpl&d6FYuMmlLGQZep0UE3`fT>xzgH>C*hQ2VzCQlO`^kThU6q%3&K^ zf^kfQm|7SeU#c%f8e?A<9mALLJ-;)p_bv6$pp~49_o;>Y=GyUQ)*prjFbkU;z%HkOW_*a#j^0b@GF|`6c}7>=W{Ef!#dz5lpkN>@IH+(sx~QMEFe4 z1GeKK67;&P%ExtO>}^JxBeHii)ykX8W@aWhJO!H(w)DH4sPatQ$F-Phiqx_clj`9m zK;z7X6gD2)8kG^aTr|oY>vmgOPQ4`_W+xj2j!$YT9x(DH6pF~ zd_C#8c>Gfb)k2Ku4~t=Xb>T^8KW;2HPN#%}@@hC1lNf~Xk)~oj=w-Y11a@DtIyYk8 z9^|_RIAA(1qUSs3rowxr&OuRVFL8(zSqU_rGlqHpkeYT4z7DGdS0q4V-b!3fsv$Yb zPq4UP^3XFd(G%JAN|0y>?&sLzNir30K(lyzNYvCtE2gDyy-nthPlrXXU75fhoS7kA zg%GYyBEFQ(xgdjtv+>?>Q!G!8& z3+F>)4|N+F1a^T?XC8 zxRRx7-{DV%uUYt&*$z2uQTbZDbUn)PozID*(i^{JDjNq`v?;&OW^&~{ZPE_e+?RMk z!7O5CUKJSnGZvjTbLX2$zwYRZs_$f{T!hvVHuTg77|O;zBHlA|GIUu_bh4`Bl?7KE zYB~a`b?O;0SfD?0EZiPYpVf=P4=|zr(u_w}oP0S`YOZziX9cuwpll&%QMv4bBC_JdP#rT3>MliqySv0& zh)r=vw?no&;5T}QVTkHKY%t`%{#*#J;aw!wPs}?q2$(e0Y#cdBG1T09ypI@#-y24+fzhJem1NSZ$TCAjU2|ebYG&&6p(0f>wQoNqVa#6J^W!3$gIWEw7d<^k!U~O5v=8goq$jC`p8CS zrox#Jw3w`k&Ty7UVbm35nZ}FYT5`fN)TO6R`tEUFotxr^BTXZGt|n(Ymqmr^pCu^^w?uX!ONbm?q{y9FehdmcJuV8V%A-ma zgl=n9+op{wkj-}N;6t;(JA1A#VF3S9AFh6EXRa0~7qop~3^~t1>hc6rdS_4!+D?Xh z5y?j}*p@*-pmlTb#7C0x{E(E@%eepK_YycNkhrYH^0m)YR&gRuQi4ZqJNv6Rih0zQ zqjMuSng>Ps;?M0YVyh<;D3~;60;>exDe)Vq3x@GRf!$wgFY5w4=Jo=g*E{76%~jqr zxTtb_L4Cz_E4RTfm@0eXfr1%ho?zP(>dsRarS>!^uAh~bd0lEhe2x7AEZQmBc%rU; z&FUrs&mIt8DL`L4JpiFp3NNyk3N>iL6;Nohp*XbZZn%BDhF_y{&{X3UtX(7aAyG63P zELC;>2L`jnFS#vC->A(hZ!tGi7N7^YtW7-LB6!SVdEM&7N?g}r4rW2wLn{Ni*I~$Y z@#;KwJIl0^?eX{JWiHQxDvccnNKBhHW0h6`j=)OH1`)7)69B$XNT@)l1s25M+~o2_ zpa&X<_vHxN_oR|B#ir2p*VNB~o6Z1OE&~a+_|AxS)(@Dgznq(b(|K8BN_nQ7+>N`= zXOx_@AhcmmcRvp6eX#4z6sn=V0%KonKFVY@+m&)Rx!Z5U@WdyHMCF4_qzJNpzc9Fw z7Bdzx54(e7>wcEqHKqH-Paiut;~ZVJpS6_q>ub)zD#TQ4j*i(I8DvS$BfyX~A%<#} z*=g2$8s;YYjEHl`7cKw!a9PFRt8tVR zM&X|bs?B1#ycjl>AzgbdRkr-@NmBc^ys)aoT75F(yweV&Y-3hNNXj-valA&=)G{NL zX?smr5sQWi3n;GGPW{%vW)xw-#D0QY%zjXxYj?($b4JzpW0sWY!fkwC5bJMkhTp$J z6CNVLd=-Ktt7D<^-f|=wjNjf0l%@iu2dR+zdQ&9NLa(B_okKdRy^!Q!F$Ro=hF$-r z!3@ocUs^7?cvdTMPbn*8S-o!PsF;>FcBkBkg&ET`W`lp?j`Z}4>DF|}9407lK9y~^No&pT7J|rVQ9Dh>qg|%=gxxg=! z>WX$!;7s~gDPmPF<--(?CvEnvV*E1KdXpr>XVv!DN~PyISE7d+K_9+W^pnR6cX&?E ziLr{0`JIs@NcA|;8L|p!3H~9y8mga2Dsm4I?rBS7$3wcT!_l*$^8U3hKUri|_I3N2 zz$xY`)IWA7P*Y1BJtyBEh?8EEvs8Oyl^{(+`gi{9hwpcN#I%Z0j$^yBp?z<;Ny!G$ zra3J_^i0(~LiKuITs%v)qE+YrJr?~w+)`Rcte^O=nwmPg@&!Q7FGTtjpTdI6wH&ZV z)2}VZY6(MbP`tgoew++(pt$jVj- zvPK)pSJ)U(XfUqBqZNo|za#Xx+IVEb?HGQ^wUVH&wTdWgP(z#ijyvXjwk>tFBUn*2 zuj5ENQjT{2&T`k;q54*Z>O~djuUBNwc6l(BzY?Ed4SIt9QA&8+>qaRIck?WdD0rh@ zh`VTZPwSNNCcLH3J}(q zdEtu@HfxDTpEqWruG=86m;QVO{}E&q8qYWhmA>(FjW`V&rg!CEL1oZCZcAX@yX(2tg8`>m1psG0ZpO+Rnph@Bhjj!~|+S=@+U{*ukwGrBj{5xfIHHP7|} z^7@g2;d%FMO8f(MS&6c##mrX2i(5uiX1o(=Vw89IQcHw)n{ZTS@``xT$Af@CQTP#w zl3kn6+MJP+l(;K-rWgjpdBU|CB4>W%cObZBH^Am~EvRO%D>uU^HVRXi$1 zb?Pr~ZlopLfT5l%03SjI7>YiGZZs=n(A!c;N9%%aByY~5(-hS4z_i2wgKYsG%OhhxH#^5i%&9ESb(@# zV_f5${Gf=$BK)1VY=NX#f+M}6f`OWmpC*OU3&+P@n>$Xvco*Nm$c<=`S|lY6S}Ut- z80}ztIpkV>W%^Ox`enpk<25_i7`RPiDugxHfUDBD8$bp9XR15>a?r^#&!1Ne6n{MI z){H`!jwrx}8b-w@@E8H0v)l!5!W8En=u67v+`iNoz<_h4{V*qQK+@)JP^JqsKAedZ zNh4toE+I7;^}7kkj|hzNVFWkZ$N9rxPl9|_@2kbW*4}&o%(L`WpQCN2M?gz>cyWHk zulMwRxpdpx+~P(({@%UY20LwM7sA&1M|`bEoq)Id zyUHt>@vfu**UOL9wiW*C75cc&qBX37qLd`<;$gS+mvL^v3Z8i4p6(@Wv`N|U6Exn< zd`@WxqU^8u^Aw+uw#vuDEIByaD)vucU2{4xRseczf_TJXUwaUK+E_IoItXJq88${0 z=K5jGehPa2)CnH&Lcxv&1jQ=T8>*vgp1^%)c&C2TL69;vSN)Q)e#Hj7!oS0 zlrEmJ=w4N9pID5KEY5qz;?2Q}0|4ESEio&cLrp221LTt~j3KjUB`LU?tP=p;B=WSXo;C?8(pnF6@?-ZD0m3DYZ* z#SzaXh|)hmTC|zQOG>aEMw%4&2XU?prlk5(M3ay-YC^QLRMN+TIB*;TB=wL_atpeD zh-!sS%A`3 z=^?niQx+^za_wQd2hRR=hsR0uzUoyOcrY!z7W)G2|C-_gqc`wrG5qCuU!Z?g*GL^H z?j^<_-A6BC^Dp`p(i0!1&?U{YlF@!|W{E@h=qQ&5*|U~V8wS;m!RK(Q6aX~oH9ToE zZYKXZoRV~!?P1ADJ74J-PFk2A{e&gh2o)@yZOZuBi^0+Hkp`dX;cZs9CRM+##;P!*BlA%M48TuR zWUgfD1DLsLs+-4XC>o>wbv-B)!t*47ON5wgoMX%llnmXG%L8209Vi;yZ`+N2v2Ox+ zMe7JHunQE$ckHHhEYRA+e`A3=XO5L%fMau71`XL7v)b{f1rkTY+WWSIkH#sG=pLqe zA(xZIp>_=4$zKq0t_G7q9@L zZ5D-0{8o%7f>0szA#c;rjL;4Y%hl}wYrx1R`Viq|Pz}c-{{LJY070ym@E~mt*pTyG z79bfcWTGGEje;PLD;N-XHw=`wS^howfzb$%oP8n)lN$o$ZWjZx|6iSsi2piI_7s7z zX#b$@z6kIJ^9{-Y^~wJ!s0V^Td5V7#4&pyU#NHw#9)N&qbpNFDR1jqC00W}91OnnS z{$J@GBz%bka`xsz;rb_iJ|rgmpUVyEZ)Xi*SO5U&|NFkTHb3y@e@%{WrvE&Jp#Lw^ zcj13CbsW+V>i@rj@SEfFf0@yjS@nbPB0)6D`lA;e%61nh`-qhydO!uS7jXGQd%i7opEnOL;| zDn!3EUm(V796;f?fA+RDF<@%qKlo)`0VtL74`!~516_aogYP%QfG#<2kQ!pijthz2 zpaFX3|D$%C7!bL242U?-e@2QZ`q$~lgZbvgfLLyVfT1OC5<8@6lLi=A{stK#zJmWd zlx+(HbgX)l$RGwH|2rV@P3o@xCrxch0$*z1ASpy(n+d4d2XWd~2AYjQm`xZU3af8F p+x$Nxf1895@0bJirXkdpJh+N7@Nb7x007(DEB&^Lm}dWn{T~m64-^0Z diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ac0b842..8fad3f5 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 1b6c787..a69d9cb 100755 --- a/gradlew +++ b/gradlew @@ -205,6 +205,12 @@ set -- \ org.gradle.wrapper.GradleWrapperMain \ "$@" +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + # Use "xargs" to parse quoted args. # # With -n1 it outputs one arg per line, with the quotes and backslashes removed. diff --git a/gradlew.bat b/gradlew.bat index ac1b06f..53a6b23 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,7 +25,7 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -40,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto execute +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -75,13 +75,15 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/oai-client/build.gradle b/oai-client/build.gradle index d1311c6..051227c 100644 --- a/oai-client/build.gradle +++ b/oai-client/build.gradle @@ -1,5 +1,5 @@ dependencies { api project(':oai-common') - testImplementation "org.xbib:marc:${project.property('xbib-marc.version')}" - testImplementation "org.xbib:bibliographic-character-sets:${project.property('xbib-bibliographic-character-sets.version')}" + testImplementation libs.marc + testImplementation libs.charactersets } diff --git a/oai-client/src/main/java/module-info.java b/oai-client/src/main/java/module-info.java index 7620b78..06fcbb0 100644 --- a/oai-client/src/main/java/module-info.java +++ b/oai-client/src/main/java/module-info.java @@ -7,7 +7,7 @@ module org.xbib.oai.client { exports org.xbib.oai.client.listrecords; exports org.xbib.oai.client.listsets; requires org.xbib.oai; - requires org.xbib.net.url; + //requires org.xbib.net; requires org.xbib.content.xml; requires java.xml; requires java.logging; diff --git a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java index e54cef4..ccaf8ca 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java +++ b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java @@ -1,11 +1,11 @@ package org.xbib.oai.client; -import org.xbib.net.URL; import org.xbib.oai.OAIConstants; import org.xbib.oai.client.identify.IdentifyRequest; import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.client.listrecords.ListRecordsRequest; import org.xbib.oai.client.listrecords.ListRecordsResponse; +import org.xbib.oai.client.util.UrlBuilder; import org.xbib.oai.exceptions.NoRecordsMatchException; import org.xbib.oai.util.ResumptionToken; import org.xbib.oai.xml.MetadataHandler; @@ -73,7 +73,7 @@ public class OAIClient { public IdentifyResponse identify() throws IOException, InterruptedException { IdentifyRequest identifyRequest = new IdentifyRequest(); IdentifyResponse identifyResponse = new IdentifyResponse(); - URL.Builder url = URL.from(baseURL).mutator(); + UrlBuilder url = UrlBuilder.fromUrl(baseURL); identifyRequest.getParams().forEach(url::queryParam); HttpRequest httpRequest = HttpRequest.newBuilder() .uri(URI.create(url.build().toExternalForm())) @@ -180,7 +180,7 @@ public class OAIClient { listRecordsRequest.addHandler(handler); } ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest); - URL.Builder url = URL.from(baseURL).mutator(); + UrlBuilder url = UrlBuilder.fromUrl(baseURL); // kind of hacky here - suppress all OAI params if resumption token is present if (listRecordsRequest.getResumptionToken() == null) { listRecordsRequest.getParams().forEach(url::queryParam); @@ -188,6 +188,7 @@ public class OAIClient { url.queryParam(OAIConstants.VERB_PARAMETER, OAIConstants.LIST_RECORDS); url.queryParam(OAIConstants.RESUMPTION_TOKEN_PARAMETER, listRecordsRequest.getResumptionToken().toString()); } + URI uri = URI.create(url.build().toExternalForm()); HttpRequest httpRequest = HttpRequest.newBuilder() .uri(uri) diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java b/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java new file mode 100755 index 0000000..4cef1fd --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java @@ -0,0 +1,196 @@ +package org.xbib.oai.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.MalformedInputException; +import java.nio.charset.UnmappableCharacterException; + +import static java.nio.charset.CoderResult.OVERFLOW; +import static java.nio.charset.CoderResult.UNDERFLOW; + +/** + * Decodes percent-encoded (%XX) Unicode text. + */ +public final class PercentDecoder { + + /** + * bytes represented by the current sequence of %-triples. Resized as needed. + */ + private ByteBuffer encodedBuf; + + /** + * Written to with decoded chars by decoder + */ + private final CharBuffer decodedCharBuf; + private final CharsetDecoder decoder; + + /** + * The decoded string for the current input + */ + private final StringBuilder outputBuf = new StringBuilder(); + + /** + * Construct a new PercentDecoder with default buffer sizes. + * + * @param charsetDecoder Charset to decode bytes into chars with + * @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int) + */ + public PercentDecoder(CharsetDecoder charsetDecoder) { + this(charsetDecoder, 16, 16); + } + + /** + * @param charsetDecoder Charset to decode bytes into chars with + * @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes + * @param decodedCharBufSize Size of buffer that encoded bytes are decoded into + */ + public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize, + int decodedCharBufSize) { + encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize); + decodedCharBuf = CharBuffer.allocate(decodedCharBufSize); + decoder = charsetDecoder; + } + + /** + * @param input Input with %-encoded representation of characters in this instance's configured character set, e.g. + * "%20" for a space character + * @return Corresponding string with %-encoded data decoded and converted to their corresponding characters + * @throws MalformedInputException if decoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is + * detected + */ + public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException { + outputBuf.setLength(0); + // this is almost always an underestimate of the size needed: + // only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate + outputBuf.ensureCapacity(input.length() / 8); + encodedBuf.clear(); + + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + if (c != '%') { + handleEncodedBytes(); + + outputBuf.append(c); + continue; + } + + if (i + 2 >= input.length()) { + throw new IllegalArgumentException( + "Could not percent decode <" + input + ">: incomplete %-pair at position " + i); + } + + // grow the byte buf if needed + if (encodedBuf.remaining() == 0) { + ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2); + encodedBuf.flip(); + largerBuf.put(encodedBuf); + encodedBuf = largerBuf; + } + + // note that we advance i here as we consume chars + int msBits = Character.digit(input.charAt(++i), 16); + int lsBits = Character.digit(input.charAt(++i), 16); + + if (msBits == -1 || lsBits == -1) { + throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">"); + } + + msBits <<= 4; + msBits |= lsBits; + + // msBits can only have 8 bits set, so cast is safe + encodedBuf.put((byte) msBits); + } + + handleEncodedBytes(); + + return outputBuf.toString(); + } + + /** + * Decode any buffered encoded bytes and write them to the output buf. + */ + private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException { + if (encodedBuf.position() == 0) { + // nothing to do + return; + } + + decoder.reset(); + CoderResult coderResult; + + // switch to reading mode + encodedBuf.flip(); + + // loop while we're filling up the decoded char buf, or there's any encoded bytes + // decode() in practice seems to only consume bytes when it can decode an entire char... + do { + decodedCharBuf.clear(); + coderResult = decoder.decode(encodedBuf, decodedCharBuf, false); + throwIfError(coderResult); + appendDecodedChars(); + } while (coderResult == OVERFLOW && encodedBuf.hasRemaining()); + + // final decode with end-of-input flag + decodedCharBuf.clear(); + coderResult = decoder.decode(encodedBuf, decodedCharBuf, true); + throwIfError(coderResult); + + if (encodedBuf.hasRemaining()) { + throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes"); + } + if (coderResult != UNDERFLOW) { + throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult); + } + + appendDecodedChars(); + + // we've finished the input, wrap it up + encodedBuf.clear(); + flush(); + } + + /** + * Must only be called when the input encoded bytes buffer is empty + */ + private void flush() throws MalformedInputException, UnmappableCharacterException { + CoderResult coderResult; + decodedCharBuf.clear(); + + coderResult = decoder.flush(decodedCharBuf); + appendDecodedChars(); + + throwIfError(coderResult); + + if (coderResult != UNDERFLOW) { + throw new IllegalStateException("Decoder flush resulted in " + coderResult); + } + } + + /** + * If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding + * CharacterCodingException. + * + * @param coderResult result to check + * @throws MalformedInputException if result represents malformed input + * @throws UnmappableCharacterException if result represents an unmappable character + */ + private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException { + if (coderResult.isMalformed()) { + throw new MalformedInputException(coderResult.length()); + } + if (coderResult.isUnmappable()) { + throw new UnmappableCharacterException(coderResult.length()); + } } + + /** + * Flip the decoded char buf and append it to the string bug + */ + private void appendDecodedChars() { + decodedCharBuf.flip(); + outputBuf.append(decodedCharBuf); + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java b/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java new file mode 100755 index 0000000..5ef12b4 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java @@ -0,0 +1,187 @@ +package org.xbib.oai.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.MalformedInputException; +import java.nio.charset.UnmappableCharacterException; +import java.util.BitSet; + +import static java.lang.Character.isHighSurrogate; +import static java.lang.Character.isLowSurrogate; + +/** + * Encodes unsafe characters as a sequence of %XX hex-encoded bytes. + * + * This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured + * PercentEncoder instances. + */ +public final class PercentEncoder { + + private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray(); + + private final BitSet safeChars; + private final CharsetEncoder encoder; + /** + * Pre-allocate a string handler to make the common case of encoding to a string faster + */ + private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler(); + private final ByteBuffer encodedBytes; + private final CharBuffer unsafeCharsToEncode; + + /** + * @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to + * those chars set to true. Treated as read only. + * @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances + * across threads. + */ + public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) { + this.safeChars = safeChars; + this.encoder = charsetEncoder; + + // why is this a float? sigh. + int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar(); + // need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once + encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2); + unsafeCharsToEncode = CharBuffer.allocate(2); + } + + /** + * Encode the input and pass output chars to a handler. + * + * @param input input string + * @param handler handler to call on each output character + * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is + * detected + */ + public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws + MalformedInputException, UnmappableCharacterException { + + for (int i = 0; i < input.length(); i++) { + + char c = input.charAt(i); + + if (safeChars.get(c)) { + handler.onOutputChar(c); + continue; + } + + // not a safe char + unsafeCharsToEncode.clear(); + unsafeCharsToEncode.append(c); + if (isHighSurrogate(c)) { + if (input.length() > i + 1) { + // get the low surrogate as well + char lowSurrogate = input.charAt(i + 1); + if (isLowSurrogate(lowSurrogate)) { + unsafeCharsToEncode.append(lowSurrogate); + i++; + } else { + throw new IllegalArgumentException( + "Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer + .toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer + .toHexString(lowSurrogate) + ")"); + } + } else { + throw new IllegalArgumentException( + "Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer + .toHexString(c) + ")"); + } + } + + flushUnsafeCharBuffer(handler); + } + } + + /** + * Encode the input and return the resulting text as a String. + * + * @param input input string + * @return the input string with every character that's not in safeChars turned into its byte representation via the + * instance's encoder and then percent-encoded + * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is + * detected + */ + public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException { + stringHandler.reset(); + stringHandler.ensureCapacity(input.length()); + encode(input, stringHandler); + return stringHandler.getContents(); + } + + /** + * Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output. + * + * Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to. + * + */ + private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException, + UnmappableCharacterException { + // need to read from the char buffer, which was most recently written to + unsafeCharsToEncode.flip(); + + encodedBytes.clear(); + + encoder.reset(); + CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true); + checkResult(result); + result = encoder.flush(encodedBytes); + checkResult(result); + + // read contents of bytebuffer + encodedBytes.flip(); + + while (encodedBytes.hasRemaining()) { + byte b = encodedBytes.get(); + handler.onOutputChar('%'); + handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]); + handler.onOutputChar(HEX_CODE[b & 0xF]); + } + } + + /** + * @param result result to check + * @throws IllegalStateException if result is overflow + * @throws MalformedInputException if result represents malformed input + * @throws UnmappableCharacterException if result represents an unmappable character + */ + private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException { + if (result.isOverflow()) { + throw new IllegalStateException("Byte buffer overflow; this should not happen."); + } + if (result.isMalformed()) { + throw new MalformedInputException(result.length()); + } + if (result.isUnmappable()) { + throw new UnmappableCharacterException(result.length()); + } + } + + private class StringBuilderPercentEncoderOutputHandler { + + private final StringBuilder stringBuilder; + + StringBuilderPercentEncoderOutputHandler() { + stringBuilder = new StringBuilder(); + } + + String getContents() { + return stringBuilder.toString(); + } + + void reset() { + stringBuilder.setLength(0); + } + + void ensureCapacity(int length) { + stringBuilder.ensureCapacity(length); + } + + void onOutputChar(char c) { + stringBuilder.append(c); + } + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java b/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java new file mode 100755 index 0000000..5feb051 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java @@ -0,0 +1,503 @@ +package org.xbib.oai.client.util; + +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import static org.xbib.oai.client.util.UrlPercentEncoders.getFragmentEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getMatrixEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getPathEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getQueryParamEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getRegNameEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getUnstructuredQueryEncoder; + +/** + * Builder for urls with url-encoding applied to path, query param, etc. + * + * Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec + * This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make + * HTTP-useful URLs. + */ +public final class UrlBuilder { + + /** + * IPv6 address, cribbed from StackOverflow + */ + private static final Pattern IPV6_PATTERN = Pattern + .compile( + "\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z"); + + /** + * IPv4 dotted quad + */ + private static final Pattern IPV4_PATTERN = Pattern + .compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z"); + + private final String scheme; + + private final String host; + + private final Integer port; + + private final List> queryParams = new ArrayList<>(); + + /** + * If this is non-null, queryParams must be empty, and vice versa. + */ + private String unstructuredQuery; + + private final List pathSegments = new ArrayList<>(); + + private final PercentEncoder pathEncoder = getPathEncoder(); + private final PercentEncoder regNameEncoder = getRegNameEncoder(); + private final PercentEncoder matrixEncoder = getMatrixEncoder(); + private final PercentEncoder queryParamEncoder = getQueryParamEncoder(); + private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder(); + private final PercentEncoder fragmentEncoder = getFragmentEncoder(); + + private String fragment; + + private boolean forceTrailingSlash = false; + + /** + * Create a URL with UTF-8 encoding. + * + * @param scheme scheme (e.g. http) + * @param host host (e.g. foo.com or 1.2.3.4 or [::1]) + * @param port null or a positive integer + */ + private UrlBuilder(String scheme, String host, Integer port) { + this.host = host; + this.scheme = scheme; + this.port = port; + } + + /** + * Create a URL with an null port and UTF-8 encoding. + * + * @param scheme scheme (e.g. http) + * @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal + * ([::1]), excluding IPvFuture since no one uses that in practice + * @return a url builder + * @see UrlBuilder#forHost(String scheme, String host, int port) + */ + public static UrlBuilder forHost(String scheme, String host) { + return new UrlBuilder(scheme, host, null); + } + + /** + * @param scheme scheme (e.g. http) + * @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal + * ([::1]), excluding IPvFuture since no one uses that in practice + * @param port port + * @return a url builder + */ + public static UrlBuilder forHost(String scheme, String host, int port) { + return new UrlBuilder(scheme, host, port); + } + + public static UrlBuilder fromUrl(String urlSpec) throws CharacterCodingException, MalformedURLException { + return fromUrl(new URL(urlSpec)); + } + + /** + * Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the + * query string apply. + * + * @param url url to initialize builder with + * @return a UrlBuilder containing the host, path, etc. from the url + * @throws CharacterCodingException if char decoding fails + * @see UrlBuilder#fromUrl(URL, CharsetDecoder) + */ + public static UrlBuilder fromUrl(URL url) throws CharacterCodingException { + return fromUrl(url, StandardCharsets.UTF_8.newDecoder()); + } + + /** + * Create a UrlBuilder initialized with the contents of a {@link URL}. + * + * The query string will be parsed into HTML4 query params if it can be separated into a + * &-separated sequence of key=value pairs. The sequence of query params can then be + * appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is + * only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that + * is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query + * string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link + * UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls + * to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed. + * + * @param url url to initialize builder with + * @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8) + * @return a UrlBuilder containing the host, path, etc. from the url + * @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to + * report errors + * @see UrlBuilder#fromUrl(URL, CharsetDecoder) + */ + public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws + CharacterCodingException { + + PercentDecoder decoder = new PercentDecoder(charsetDecoder); + // reg names must be encoded UTF-8 + PercentDecoder regNameDecoder; + if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) { + regNameDecoder = decoder; + } else { + regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder()); + } + + Integer port = url.getPort(); + if (port == -1) { + port = null; + } + + UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port); + + buildFromPath(builder, decoder, url); + + buildFromQuery(builder, decoder, url); + + if (url.getRef() != null) { + builder.fragment(decoder.decode(url.getRef())); + } + + return builder; + } + + /** + * Add a path segment. + * + * @param segment a path segment + * @return this + */ + public UrlBuilder pathSegment(String segment) { + pathSegments.add(new PathSegment(segment)); + return this; + } + + /** + * Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}. + * + * @param segments path segments + * @return this + */ + public UrlBuilder pathSegments(String... segments) { + for (String segment : segments) { + pathSegment(segment); + } + + return this; + } + + /** + * Add an HTML query parameter. Query parameters will be encoded in the order added. + * + * Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by + * HTML 4. + * + * If you use this method to build a query string, or created this builder from a url with a query string that can + * successfully be parsed into query param pairs, you cannot subsequently use {@link + * UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}. + * + * @param name param name + * @param value param value + * @return this + */ + public UrlBuilder queryParam(String name, String value) { + if (unstructuredQuery != null) { + throw new IllegalStateException( + "Cannot call queryParam() when this already has an unstructured query specified"); + } + + queryParams.add(Pair.of(name, value)); + return this; + } + + /** + * Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that + * is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite + * that query. + * + * If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you + * cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL, + * CharsetDecoder)}. + * + * @param query Complete URI query, as specified by RFC 3986. + * @return this + */ + public UrlBuilder unstructuredQuery(String query) { + if (!queryParams.isEmpty()) { + throw new IllegalStateException( + "Cannot call unstructuredQuery() when this already has queryParam pairs specified"); + } + + unstructuredQuery = query; + + return this; + } + + /** + * Clear the unstructured query and any query params. + * + * Since the query / query param situation is a little complicated, this method will let you remove all query + * information and start again from scratch. This may be useful when taking an existing url, parsing it into a + * builder, and then re-doing its query params, for instance. + * + * @return this + */ + public UrlBuilder clearQuery() { + queryParams.clear(); + unstructuredQuery = null; + + return this; + } + + /** + * Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the + * root. Matrix params will be encoded in the order added. + * + * @param name param name + * @param value param value + * @return this + */ + public UrlBuilder matrixParam(String name, String value) { + if (pathSegments.isEmpty()) { + // create an empty path segment to represent a matrix param applied to the root + pathSegment(""); + } + + PathSegment seg = pathSegments.get(pathSegments.size() - 1); + seg.matrixParams.add(Pair.of(name, value)); + return this; + } + + /** + * Set the fragment. + * + * @param fragment fragment string + * @return this + */ + public UrlBuilder fragment(String fragment) { + this.fragment = fragment; + return this; + } + + /** + * Force the generated URL to have a trailing slash at the end of the path. + * + * @return this + */ + public UrlBuilder forceTrailingSlash() { + forceTrailingSlash = true; + return this; + } + + public URL build() throws CharacterCodingException, MalformedURLException { + return new URL(toUrlString()); + } + + /** + * Encode the current builder state into a URL string. + * + * @return a well-formed URL string + * @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors + */ + public String toUrlString() throws CharacterCodingException { + StringBuilder buf = new StringBuilder(); + + buf.append(scheme); + buf.append("://"); + + buf.append(encodeHost(host)); + if (port != null) { + buf.append(':'); + buf.append(port); + } + + for (PathSegment pathSegment : pathSegments) { + buf.append('/'); + buf.append(pathEncoder.encode(pathSegment.segment)); + + for (Pair matrixParam : pathSegment.matrixParams) { + buf.append(';'); + buf.append(matrixEncoder.encode(matrixParam.getKey())); + buf.append('='); + buf.append(matrixEncoder.encode(matrixParam.getValue())); + } + } + + if (forceTrailingSlash) { + buf.append('/'); + } + + if (!queryParams.isEmpty()) { + buf.append("?"); + Iterator> qpIter = queryParams.iterator(); + while (qpIter.hasNext()) { + Pair queryParam = qpIter.next(); + buf.append(queryParamEncoder.encode(queryParam.getKey())); + buf.append('='); + buf.append(queryParamEncoder.encode(queryParam.getValue())); + if (qpIter.hasNext()) { + buf.append('&'); + } + } + } else if (unstructuredQuery != null) { + buf.append("?"); + buf.append(unstructuredQueryEncoder.encode(unstructuredQuery)); + } + + if (fragment != null) { + buf.append('#'); + buf.append(fragmentEncoder.encode(fragment)); + } + + return buf.toString(); + } + + /** + * Populate a url builder based on the query of a url + * + * @param builder builder + * @param decoder decoder + * @param url url + * @throws CharacterCodingException + */ + private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws + CharacterCodingException { + if (url.getQuery() != null) { + String q = url.getQuery(); + + // try to parse into &-separated key=value pairs + List> pairs = new ArrayList<>(); + boolean parseOk = true; + + for (String queryChunk : q.split("&")) { + String[] queryParamChunks = queryChunk.split("="); + + if (queryParamChunks.length != 2) { + parseOk = false; + break; + } + + pairs.add(Pair.of(decoder.decode(queryParamChunks[0]), + decoder.decode(queryParamChunks[1]))); + } + + if (parseOk) { + for (Pair pair : pairs) { + builder.queryParam(pair.getKey(), pair.getValue()); + } + } else { + builder.unstructuredQuery(decoder.decode(q)); + } + } + } + + /** + * Populate the path segments of a url builder from a url + * + * @param builder builder + * @param decoder decoder + * @param url url + * @throws CharacterCodingException + */ + private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws + CharacterCodingException { + for (String pathChunk : url.getPath().split("/")) { + if (pathChunk.equals("")) { + continue; + } + + if (pathChunk.charAt(0) == ';') { + builder.pathSegment(""); + // empty path segment, but matrix params + for (String matrixChunk : pathChunk.substring(1).split(";")) { + buildFromMatrixParamChunk(decoder, builder, matrixChunk); + } + + continue; + } + + // otherwise, path chunk is non empty and does not start with a ';' + + String[] matrixChunks = pathChunk.split(";"); + + // first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will + // not be included in the final url. + builder.pathSegment(decoder.decode(matrixChunks[0])); + + // if there any other chunks, they're matrix param pairs + for (int i = 1; i < matrixChunks.length; i++) { + buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]); + } + } + } + + private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws + CharacterCodingException { + String[] mtxPair = pathMatrixChunk.split("="); + if (mtxPair.length != 2) { + throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">"); + } + + String mtxName = mtxPair[0]; + String mtxVal = mtxPair[1]; + ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal)); + } + + /** + * @param host original host string + * @return host encoded as in RFC 3986 section 3.2.2 + */ + private String encodeHost(String host) throws CharacterCodingException { + // matching order: IP-literal, IPv4, reg-name + if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) { + return host; + } + + // it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL) + return regNameEncoder.encode(host); + } + + /** + * Bundle of a path segment name and any associated matrix params. + */ + private static class PathSegment { + private final String segment; + private final List> matrixParams = new ArrayList<>(); + + PathSegment(String segment) { + this.segment = segment; + } + } + + static class Pair { + + K key; + + V value; + + Pair(K key, V value) { + this.key = key; + this.value = value; + } + + static Pair of(K key, V value) { + return new Pair<>(key, value); + } + + K getKey() { + return key; + } + + V getValue() { + return value; + } + + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java b/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java new file mode 100755 index 0000000..222fd98 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java @@ -0,0 +1,164 @@ +package org.xbib.oai.client.util; + +import java.nio.charset.StandardCharsets; +import java.util.BitSet; +import static java.nio.charset.CodingErrorAction.REPLACE; + +/** + * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding. + */ +public final class UrlPercentEncoders { + + /** + * an encoder for RFC 3986 reg-names + */ + + private static final BitSet REG_NAME_BIT_SET = new BitSet(); + + private static final BitSet PATH_BIT_SET = new BitSet(); + private static final BitSet MATRIX_BIT_SET = new BitSet(); + private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet(); + private static final BitSet QUERY_PARAM_BIT_SET = new BitSet(); + private static final BitSet FRAGMENT_BIT_SET = new BitSet(); + + static { + // RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this. + // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant. + addUnreserved(REG_NAME_BIT_SET); + addSubdelims(REG_NAME_BIT_SET); + + // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section. + addPChar(PATH_BIT_SET); + PATH_BIT_SET.clear((int) ';'); + + // Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded. + addPChar(MATRIX_BIT_SET); + MATRIX_BIT_SET.clear((int) ';'); + MATRIX_BIT_SET.clear((int) '='); + + /* + * At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also + * specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is + */ + addQuery(UNSTRUCTURED_QUERY_BIT_SET); + UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+'); + + /* + * Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value + * pairs can be used. + */ + QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET); + QUERY_PARAM_BIT_SET.clear((int) '='); + QUERY_PARAM_BIT_SET.clear((int) '&'); + + addFragment(FRAGMENT_BIT_SET); + } + + public static PercentEncoder getRegNameEncoder() { + return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getPathEncoder() { + return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getMatrixEncoder() { + return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getUnstructuredQueryEncoder() { + return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getQueryParamEncoder() { + return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getFragmentEncoder() { + return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + private UrlPercentEncoders() { + } + + /** + * Add code points for 'fragment' chars + * + * @param fragmentBitSet bit set + */ + private static void addFragment(BitSet fragmentBitSet) { + addPChar(fragmentBitSet); + fragmentBitSet.set((int) '/'); + fragmentBitSet.set((int) '?'); + } + + /** + * Add code points for 'query' chars + * + * @param queryBitSet bit set + */ + private static void addQuery(BitSet queryBitSet) { + addPChar(queryBitSet); + queryBitSet.set((int) '/'); + queryBitSet.set((int) '?'); + } + + /** + * Add code points for 'pchar' chars. + * + * @param bs bitset + */ + private static void addPChar(BitSet bs) { + addUnreserved(bs); + addSubdelims(bs); + bs.set((int) ':'); + bs.set((int) '@'); + } + + /** + * Add codepoints for 'unreserved' chars + * + * @param bs bitset to add codepoints to + */ + private static void addUnreserved(BitSet bs) { + + for (int i = 'a'; i <= 'z'; i++) { + bs.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + bs.set(i); + } + for (int i = '0'; i <= '9'; i++) { + bs.set(i); + } + bs.set((int) '-'); + bs.set((int) '.'); + bs.set((int) '_'); + bs.set((int) '~'); + } + + /** + * Add codepoints for 'sub-delims' chars + * + * @param bs bitset to add codepoints to + */ + private static void addSubdelims(BitSet bs) { + bs.set((int) '!'); + bs.set((int) '$'); + bs.set((int) '&'); + bs.set((int) '\''); + bs.set((int) '('); + bs.set((int) ')'); + bs.set((int) '*'); + bs.set((int) '+'); + bs.set((int) ','); + bs.set((int) ';'); + bs.set((int) '='); + } +} diff --git a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java index b8c3ca3..933515a 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java @@ -23,13 +23,15 @@ class DOAJClientTest { IdentifyResponse identifyResponse = oaiClient.identify(); String granularity = identifyResponse.getGranularity(); logger.log(Level.INFO, "granularity = " + granularity); - DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? - DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null; + // override granularity because of "bad arguments" error. Seems DOAJ is unable to manage it's own declared granularity. + DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")); Handler handler = new Handler(); SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false); oaiClient.setSplitWriter(splitWriter); + Instant to = Instant.now(); + Instant from = to.atZone(ZoneId.systemDefault()).minusMonths(1).toInstant(); oaiClient.listRecords("oai_dc", null, - dateTimeFormatter,Instant.parse("2021-05-01T00:00:00Z"), Instant.parse("2021-06-01T00:00:00Z"), null, + dateTimeFormatter, from, to, null, handler, null); logger.log(Level.INFO, "count = " + handler.count()); assertTrue(handler.count() > 0); diff --git a/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java b/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java new file mode 100755 index 0000000..9a61649 --- /dev/null +++ b/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java @@ -0,0 +1,84 @@ +package org.xbib.oai.client.util; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.nio.charset.CharacterCodingException; +import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; +import java.nio.charset.UnmappableCharacterException; +import java.util.BitSet; + +import static java.nio.charset.CodingErrorAction.REPLACE; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public final class PercentEncoderTest { + + private static PercentEncoder alnum; + private static PercentEncoder alnum16; + + @BeforeAll + public static void setUp() { + BitSet bs = new BitSet(); + for (int i = 'a'; i <= 'z'; i++) { + bs.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + bs.set(i); + } + for (int i = '0'; i <= '9'; i++) { + bs.set(i); + } + + alnum = new PercentEncoder(bs, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + alnum16 = new PercentEncoder(bs, StandardCharsets.UTF_16BE.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + @Test + public void testDoesntEncodeSafe() throws CharacterCodingException { + BitSet set = new BitSet(); + for (int i = 'a'; i <= 'z'; i++) { + set.set(i); + } + + PercentEncoder pe = new PercentEncoder(set, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD")); + } + + @Test + public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException { + assertEquals("abc%20123", alnum.encode("abc 123")); + } + + @Test + public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException { + assertEquals("%20abc%20", alnum.encode(" abc ")); + } + + @Test + public void testEncodeUtf8() throws CharacterCodingException { + // 1 UTF-16 char (unicode snowman) + assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603")); + } + + @Test + public void testEncodeUtf8SurrogatePair() throws CharacterCodingException { + // musical G clef: 1d11e, has to be represented in surrogate pair form + assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e")); + } + + @Test + public void testEncodeUtf16() throws CharacterCodingException { + // 1 UTF-16 char (unicode snowman) + assertEquals("snowman%26%03", alnum16.encode("snowman\u2603")); + } + + @Test + public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException { + // musical G clef: 1d11e, has to be represented in surrogate pair form + assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e")); + } +} diff --git a/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java b/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java new file mode 100755 index 0000000..0d3d30a --- /dev/null +++ b/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java @@ -0,0 +1,425 @@ +package org.xbib.oai.client.util; + +import org.junit.jupiter.api.Test; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.CharacterCodingException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +public final class UrlBuilderTest { + + @Test + public void testNoUrlParts() throws Exception { + assertUrlEquals("http://foo.com", UrlBuilder.forHost("http", "foo.com").toUrlString()); + } + + @Test + public void testWithPort() throws Exception { + assertUrlEquals("http://foo.com:33", UrlBuilder.forHost("http", "foo.com", 33).toUrlString()); + } + + @Test + public void testSimplePath() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("seg1").pathSegment("seg2"); + assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString()); + } + + @Test + public void testPathWithReserved() throws Exception { + // RFC 1738 S3.3 + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("seg/;?ment").pathSegment("seg=&2"); + assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString()); + } + + @Test + public void testPathSegments() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegments("seg1", "seg2", "seg3"); + assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString()); + } + + @Test + public void testMatrixWithoutPathHasLeadingSlash() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.matrixParam("foo", "bar"); + assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString()); + } + + @Test + public void testMatrixWithReserved() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com") + .pathSegment("foo") + .matrixParam("foo", "bar") + .matrixParam("res;=?#/erved", "value") + .pathSegment("baz"); + assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString()); + } + + @Test + public void testUrlEncodedPathSegmentUtf8() throws Exception { + // 1 UTF-16 char + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("snowman").pathSegment("\u2603"); + assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString()); + } + + @Test + public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + // musical G clef: 1d11e, has to be represented in surrogate pair form + ub.pathSegment("clef").pathSegment("\ud834\udd1e"); + assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString()); + } + + @Test + public void testQueryParamNoPath() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + String s = ub.toUrlString(); + assertUrlEquals("http://foo.com?foo=bar", s); + } + + @Test + public void testQueryParamsDuplicated() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + ub.queryParam("foo", "bar2"); + ub.queryParam("baz", "quux"); + ub.queryParam("baz", "quux2"); + assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString()); + } + + @Test + public void testEncodeQueryParams() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar&=#baz"); + ub.queryParam("foo", "bar?/2"); + assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString()); + } + + @Test + public void testEncodeQueryParamWithSpaceAndPlus() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "spa ce"); + ub.queryParam("fo+o", "plus+"); + assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString()); + } + + @Test + public void testPlusInVariousParts() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + + ub.pathSegment("has+plus") + .matrixParam("plusMtx", "pl+us") + .queryParam("plusQp", "pl+us") + .fragment("plus+frag"); + + assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString()); + } + + @Test + public void testFragment() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + ub.fragment("#frag/?"); + assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString()); + } + + @Test + public void testAllParts() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.bar.com", 3333); + ub.pathSegment("foo"); + ub.pathSegment("bar"); + ub.matrixParam("mtx1", "val1"); + ub.matrixParam("mtx2", "val2"); + ub.queryParam("q1", "v1"); + ub.queryParam("q2", "v2"); + ub.fragment("zomg it's a fragment"); + + assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment", + ub.toUrlString()); + } + + @Test + public void testIPv4Literal() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "127.0.0.1"); + assertUrlEquals("http://127.0.0.1", ub.toUrlString()); + } + + @Test + public void testBadIPv4LiteralDoesntChoke() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "300.100.50.1"); + assertUrlEquals("http://300.100.50.1", ub.toUrlString()); + } + + @Test + public void testIPv6LiteralLocalhost() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "[::1]"); + assertUrlEquals("http://[::1]", ub.toUrlString()); + } + + @Test + public void testIPv6Literal() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "[2001:db8:85a3::8a2e:370:7334]"); + assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString()); + } + + @Test + public void testEncodedRegNameSingleByte() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host?name;"); + assertUrlEquals("http://host%3Fname;", ub.toUrlString()); + } + + @Test + public void testEncodedRegNameMultiByte() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "snow\u2603man"); + assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlash() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c"); + + assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlashWithQueryParams() throws Exception { + UrlBuilder ub = + UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar"); + + assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1"); + + assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString()); + } + + @Test + public void testIntermingledMatrixParamsAndPathSegments() throws Exception { + + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com") + .pathSegments("seg1", "seg2") + .matrixParam("m1", "v1") + .pathSegment("seg3") + .matrixParam("m2", "v2"); + + assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString()); + } + + @Test + public void testFromUrlWithEverything() throws URISyntaxException, CharacterCodingException, MalformedURLException { + String orig = + "https://foo.bar.com:3333/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragment"; + assertUrlBuilderRoundtrip(orig); + } + + @Test + public void testFromUrlWithEmptyPath() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com"); + } + + @Test + public void testFromUrlWithEmptyPathAndSlash() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com"); + } + + @Test + public void testFromUrlWithPort() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com:1234"); + } + + @Test + public void testFromUrlWithEmptyPathSegent() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo"); + } + + @Test + public void testFromUrlWithEncodedHost() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://f%20oo.com/bar"); + } + + @Test + public void testFromUrlWithEncodedPathSegment() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar"); + } + + @Test + public void testFromUrlWithEncodedMatrixParam() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202"); + } + + @Test + public void testFromUrlWithEncodedQueryParam() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2"); + } + + @Test + public void testFromUrlWithEncodedQueryParamDelimiter() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2"); + } + + @Test + public void testFromUrlWithEncodedFragment() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar"); + } + + @Test + public void testFromUrlWithMalformedMatrixPair() throws MalformedURLException, CharacterCodingException { + try { + UrlBuilder.fromUrl("http://foo.com/foo;m1=v1=v2"); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("Malformed matrix param: ", e.getMessage()); + } + } + + @Test + public void testFromUrlWithEmptyPathSegmentWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1"); + } + + @Test + public void testFromUrlWithEmptyPathWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/;m1=v1"); + } + + @Test + public void testFromUrlWithEmptyPathWithMultipleMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2"); + } + + @Test + public void testFromUrlWithPathSegmentEndingWithSemicolon() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo"); + } + + @Test + public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException { + try { + UrlBuilder.fromUrl("http://foo.com/fo%2o"); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("Invalid %-tuple <%2o>", e.getMessage()); + } + } + + @Test + public void testFromUrlMalformedQueryParamMultiValues() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2"); + } + + @Test + public void testFromUrlMalformedQueryParamNoValue() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2"); + } + + @Test + public void testFromUrlUnstructuredQueryWithEscapedChars() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23"); + } + + @Test + public void testCantUseQueryParamAfterQuery() { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q"); + + try { + ub.queryParam("foo", "bar"); + fail(); + } catch (IllegalStateException e) { + assertEquals("Cannot call queryParam() when this already has an unstructured query specified", + e.getMessage()); + } + } + + @Test + public void testCantUseQueryAfterQueryParam() { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").queryParam("foo", "bar"); + + try { + ub.unstructuredQuery("q"); + + fail(); + } catch (IllegalStateException e) { + assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified", + e.getMessage()); + } + } + + @Test + public void testUnstructuredQueryWithNoSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q").toUrlString()); + } + + @Test + public void testUnstructuredQueryWithOkSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q?/&=", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString()); + } + + @Test + public void testUnstructuredQueryWithEscapedSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q%23%2B", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString()); + } + + @Test + public void testClearQueryRemovesQueryParam() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .queryParam("foo", "bar") + .clearQuery(); + assertUrlEquals("http://host", ub.toUrlString()); + } + + @Test + public void testClearQueryRemovesUnstructuredQuery() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .unstructuredQuery("foobar") + .clearQuery(); + assertUrlEquals("http://host", ub.toUrlString()); + } + + @Test + public void testClearQueryAfterQueryParamAllowsQuery() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .queryParam("foo", "bar") + .clearQuery() + .unstructuredQuery("foobar"); + assertUrlEquals("http://host?foobar", ub.toUrlString()); + } + + @Test + public void testClearQueryAfterQueryAllowsQueryParam() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .unstructuredQuery("foobar") + .clearQuery() + .queryParam("foo", "bar"); + assertUrlEquals("http://host?foo=bar", ub.toUrlString()); + } + + private void assertUrlBuilderRoundtrip(String url) throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip(url, url); + } + + /** + * @param origUrl the url that will be used to create a URL + * @param finalUrl the URL string it should end up as + */ + private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl) throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlEquals(finalUrl, UrlBuilder.fromUrl(new URL(origUrl)).toUrlString()); + } + + private static void assertUrlEquals(String expected, String actual) throws URISyntaxException, MalformedURLException { + assertEquals(expected, actual); + assertEquals(expected, new URI(actual).toString()); + assertEquals(expected, new URL(actual).toString()); + } +} diff --git a/oai-common/build.gradle b/oai-common/build.gradle index ef91f94..733943d 100644 --- a/oai-common/build.gradle +++ b/oai-common/build.gradle @@ -1,6 +1,6 @@ dependencies { - api "org.xbib:content-core:${project.property('xbib-content.version')}" - api "org.xbib:content-rdf:${project.property('xbib-content.version')}" - api "org.xbib:content-resource:${project.property('xbib-content.version')}" - api "org.xbib:content-xml:${project.property('xbib-content.version')}" + api libs.content.core + api libs.content.rdf + api libs.content.resource + api libs.content.xml } diff --git a/settings.gradle b/settings.gradle index a1d9ded..aa5ef68 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,23 @@ +dependencyResolutionManagement { + versionCatalogs { + libs { + version('gradle', '7.5.1') + version('junit', '5.9.1') + version('content', '5.0.1') + library('junit-jupiter-api', 'org.junit.jupiter', 'junit-jupiter-api').versionRef('junit') + library('junit-jupiter-params', 'org.junit.jupiter', 'junit-jupiter-params').versionRef('junit') + library('junit-jupiter-engine', 'org.junit.jupiter', 'junit-jupiter-engine').versionRef('junit') + library('hamcrest', 'org.hamcrest', 'hamcrest-library').version('2.2') + library('content-core', 'org.xbib', 'content-json').versionRef('content') + library('content-rdf', 'org.xbib', 'content-rdf').versionRef('content') + library('content-resource', 'org.xbib', 'content-resource').versionRef('content') + library('content-xml', 'org.xbib', 'content-xml').versionRef('content') + library('marc', 'org.xbib', 'marc').version('2.7.0') + library('charactersets', 'org.xbib', 'bibliographic-character-sets').version('2.0.0') + } + } +} + include 'oai-common' include 'oai-client' include 'oai-server'