diff --git a/build.gradle b/build.gradle index 7877e53..99ae975 100644 --- a/build.gradle +++ b/build.gradle @@ -4,7 +4,7 @@ plugins { } wrapper { - gradleVersion = "${project.property('gradle.wrapper.version')}" + gradleVersion = libs.versions.gradle.get() distributionType = Wrapper.DistributionType.ALL } diff --git a/gradle.properties b/gradle.properties index d1bfbe8..0b479b5 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,3 +1,5 @@ group = org.xbib name = oai version = 3.0.0 + +gradle.wrapper.version = 7.5.1 diff --git a/gradle/compile/java.gradle b/gradle/compile/java.gradle index b3744c5..9016125 100644 --- a/gradle/compile/java.gradle +++ b/gradle/compile/java.gradle @@ -6,13 +6,13 @@ java { } compileJava { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } compileTestJava { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } jar { diff --git a/gradle/test/junit5.gradle b/gradle/test/junit5.gradle index e1960cf..81b99a6 100644 --- a/gradle/test/junit5.gradle +++ b/gradle/test/junit5.gradle @@ -1,12 +1,8 @@ - -def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.8.2' -def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2' - dependencies { - testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}" - testImplementation "org.junit.jupiter:junit-jupiter-params:${junitVersion}" - testImplementation "org.hamcrest:hamcrest-library:${hamcrestVersion}" - testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}" + testImplementation libs.junit.jupiter.api + testImplementation libs.junit.jupiter.params + testImplementation libs.hamcrest + testRuntimeOnly libs.junit.jupiter.engine } test { diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 7454180..249e583 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ac0b842..8fad3f5 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 1b6c787..a69d9cb 100755 --- a/gradlew +++ b/gradlew @@ -205,6 +205,12 @@ set -- \ org.gradle.wrapper.GradleWrapperMain \ "$@" +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + # Use "xargs" to parse quoted args. # # With -n1 it outputs one arg per line, with the quotes and backslashes removed. diff --git a/gradlew.bat b/gradlew.bat index ac1b06f..53a6b23 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,7 +25,7 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -40,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto execute +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -75,13 +75,15 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/oai-client/build.gradle b/oai-client/build.gradle index d1311c6..051227c 100644 --- a/oai-client/build.gradle +++ b/oai-client/build.gradle @@ -1,5 +1,5 @@ dependencies { api project(':oai-common') - testImplementation "org.xbib:marc:${project.property('xbib-marc.version')}" - testImplementation "org.xbib:bibliographic-character-sets:${project.property('xbib-bibliographic-character-sets.version')}" + testImplementation libs.marc + testImplementation libs.charactersets } diff --git a/oai-client/src/main/java/module-info.java b/oai-client/src/main/java/module-info.java index 7620b78..06fcbb0 100644 --- a/oai-client/src/main/java/module-info.java +++ b/oai-client/src/main/java/module-info.java @@ -7,7 +7,7 @@ module org.xbib.oai.client { exports org.xbib.oai.client.listrecords; exports org.xbib.oai.client.listsets; requires org.xbib.oai; - requires org.xbib.net.url; + //requires org.xbib.net; requires org.xbib.content.xml; requires java.xml; requires java.logging; diff --git a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java index e54cef4..ccaf8ca 100644 --- a/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java +++ b/oai-client/src/main/java/org/xbib/oai/client/OAIClient.java @@ -1,11 +1,11 @@ package org.xbib.oai.client; -import org.xbib.net.URL; import org.xbib.oai.OAIConstants; import org.xbib.oai.client.identify.IdentifyRequest; import org.xbib.oai.client.identify.IdentifyResponse; import org.xbib.oai.client.listrecords.ListRecordsRequest; import org.xbib.oai.client.listrecords.ListRecordsResponse; +import org.xbib.oai.client.util.UrlBuilder; import org.xbib.oai.exceptions.NoRecordsMatchException; import org.xbib.oai.util.ResumptionToken; import org.xbib.oai.xml.MetadataHandler; @@ -73,7 +73,7 @@ public class OAIClient { public IdentifyResponse identify() throws IOException, InterruptedException { IdentifyRequest identifyRequest = new IdentifyRequest(); IdentifyResponse identifyResponse = new IdentifyResponse(); - URL.Builder url = URL.from(baseURL).mutator(); + UrlBuilder url = UrlBuilder.fromUrl(baseURL); identifyRequest.getParams().forEach(url::queryParam); HttpRequest httpRequest = HttpRequest.newBuilder() .uri(URI.create(url.build().toExternalForm())) @@ -180,7 +180,7 @@ public class OAIClient { listRecordsRequest.addHandler(handler); } ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest); - URL.Builder url = URL.from(baseURL).mutator(); + UrlBuilder url = UrlBuilder.fromUrl(baseURL); // kind of hacky here - suppress all OAI params if resumption token is present if (listRecordsRequest.getResumptionToken() == null) { listRecordsRequest.getParams().forEach(url::queryParam); @@ -188,6 +188,7 @@ public class OAIClient { url.queryParam(OAIConstants.VERB_PARAMETER, OAIConstants.LIST_RECORDS); url.queryParam(OAIConstants.RESUMPTION_TOKEN_PARAMETER, listRecordsRequest.getResumptionToken().toString()); } + URI uri = URI.create(url.build().toExternalForm()); HttpRequest httpRequest = HttpRequest.newBuilder() .uri(uri) diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java b/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java new file mode 100755 index 0000000..4cef1fd --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/PercentDecoder.java @@ -0,0 +1,196 @@ +package org.xbib.oai.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.MalformedInputException; +import java.nio.charset.UnmappableCharacterException; + +import static java.nio.charset.CoderResult.OVERFLOW; +import static java.nio.charset.CoderResult.UNDERFLOW; + +/** + * Decodes percent-encoded (%XX) Unicode text. + */ +public final class PercentDecoder { + + /** + * bytes represented by the current sequence of %-triples. Resized as needed. + */ + private ByteBuffer encodedBuf; + + /** + * Written to with decoded chars by decoder + */ + private final CharBuffer decodedCharBuf; + private final CharsetDecoder decoder; + + /** + * The decoded string for the current input + */ + private final StringBuilder outputBuf = new StringBuilder(); + + /** + * Construct a new PercentDecoder with default buffer sizes. + * + * @param charsetDecoder Charset to decode bytes into chars with + * @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int) + */ + public PercentDecoder(CharsetDecoder charsetDecoder) { + this(charsetDecoder, 16, 16); + } + + /** + * @param charsetDecoder Charset to decode bytes into chars with + * @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes + * @param decodedCharBufSize Size of buffer that encoded bytes are decoded into + */ + public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize, + int decodedCharBufSize) { + encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize); + decodedCharBuf = CharBuffer.allocate(decodedCharBufSize); + decoder = charsetDecoder; + } + + /** + * @param input Input with %-encoded representation of characters in this instance's configured character set, e.g. + * "%20" for a space character + * @return Corresponding string with %-encoded data decoded and converted to their corresponding characters + * @throws MalformedInputException if decoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is + * detected + */ + public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException { + outputBuf.setLength(0); + // this is almost always an underestimate of the size needed: + // only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate + outputBuf.ensureCapacity(input.length() / 8); + encodedBuf.clear(); + + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + if (c != '%') { + handleEncodedBytes(); + + outputBuf.append(c); + continue; + } + + if (i + 2 >= input.length()) { + throw new IllegalArgumentException( + "Could not percent decode <" + input + ">: incomplete %-pair at position " + i); + } + + // grow the byte buf if needed + if (encodedBuf.remaining() == 0) { + ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2); + encodedBuf.flip(); + largerBuf.put(encodedBuf); + encodedBuf = largerBuf; + } + + // note that we advance i here as we consume chars + int msBits = Character.digit(input.charAt(++i), 16); + int lsBits = Character.digit(input.charAt(++i), 16); + + if (msBits == -1 || lsBits == -1) { + throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">"); + } + + msBits <<= 4; + msBits |= lsBits; + + // msBits can only have 8 bits set, so cast is safe + encodedBuf.put((byte) msBits); + } + + handleEncodedBytes(); + + return outputBuf.toString(); + } + + /** + * Decode any buffered encoded bytes and write them to the output buf. + */ + private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException { + if (encodedBuf.position() == 0) { + // nothing to do + return; + } + + decoder.reset(); + CoderResult coderResult; + + // switch to reading mode + encodedBuf.flip(); + + // loop while we're filling up the decoded char buf, or there's any encoded bytes + // decode() in practice seems to only consume bytes when it can decode an entire char... + do { + decodedCharBuf.clear(); + coderResult = decoder.decode(encodedBuf, decodedCharBuf, false); + throwIfError(coderResult); + appendDecodedChars(); + } while (coderResult == OVERFLOW && encodedBuf.hasRemaining()); + + // final decode with end-of-input flag + decodedCharBuf.clear(); + coderResult = decoder.decode(encodedBuf, decodedCharBuf, true); + throwIfError(coderResult); + + if (encodedBuf.hasRemaining()) { + throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes"); + } + if (coderResult != UNDERFLOW) { + throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult); + } + + appendDecodedChars(); + + // we've finished the input, wrap it up + encodedBuf.clear(); + flush(); + } + + /** + * Must only be called when the input encoded bytes buffer is empty + */ + private void flush() throws MalformedInputException, UnmappableCharacterException { + CoderResult coderResult; + decodedCharBuf.clear(); + + coderResult = decoder.flush(decodedCharBuf); + appendDecodedChars(); + + throwIfError(coderResult); + + if (coderResult != UNDERFLOW) { + throw new IllegalStateException("Decoder flush resulted in " + coderResult); + } + } + + /** + * If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding + * CharacterCodingException. + * + * @param coderResult result to check + * @throws MalformedInputException if result represents malformed input + * @throws UnmappableCharacterException if result represents an unmappable character + */ + private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException { + if (coderResult.isMalformed()) { + throw new MalformedInputException(coderResult.length()); + } + if (coderResult.isUnmappable()) { + throw new UnmappableCharacterException(coderResult.length()); + } } + + /** + * Flip the decoded char buf and append it to the string bug + */ + private void appendDecodedChars() { + decodedCharBuf.flip(); + outputBuf.append(decodedCharBuf); + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java b/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java new file mode 100755 index 0000000..5ef12b4 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/PercentEncoder.java @@ -0,0 +1,187 @@ +package org.xbib.oai.client.util; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.MalformedInputException; +import java.nio.charset.UnmappableCharacterException; +import java.util.BitSet; + +import static java.lang.Character.isHighSurrogate; +import static java.lang.Character.isLowSurrogate; + +/** + * Encodes unsafe characters as a sequence of %XX hex-encoded bytes. + * + * This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured + * PercentEncoder instances. + */ +public final class PercentEncoder { + + private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray(); + + private final BitSet safeChars; + private final CharsetEncoder encoder; + /** + * Pre-allocate a string handler to make the common case of encoding to a string faster + */ + private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler(); + private final ByteBuffer encodedBytes; + private final CharBuffer unsafeCharsToEncode; + + /** + * @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to + * those chars set to true. Treated as read only. + * @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances + * across threads. + */ + public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) { + this.safeChars = safeChars; + this.encoder = charsetEncoder; + + // why is this a float? sigh. + int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar(); + // need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once + encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2); + unsafeCharsToEncode = CharBuffer.allocate(2); + } + + /** + * Encode the input and pass output chars to a handler. + * + * @param input input string + * @param handler handler to call on each output character + * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is + * detected + */ + public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws + MalformedInputException, UnmappableCharacterException { + + for (int i = 0; i < input.length(); i++) { + + char c = input.charAt(i); + + if (safeChars.get(c)) { + handler.onOutputChar(c); + continue; + } + + // not a safe char + unsafeCharsToEncode.clear(); + unsafeCharsToEncode.append(c); + if (isHighSurrogate(c)) { + if (input.length() > i + 1) { + // get the low surrogate as well + char lowSurrogate = input.charAt(i + 1); + if (isLowSurrogate(lowSurrogate)) { + unsafeCharsToEncode.append(lowSurrogate); + i++; + } else { + throw new IllegalArgumentException( + "Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer + .toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer + .toHexString(lowSurrogate) + ")"); + } + } else { + throw new IllegalArgumentException( + "Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer + .toHexString(c) + ")"); + } + } + + flushUnsafeCharBuffer(handler); + } + } + + /** + * Encode the input and return the resulting text as a String. + * + * @param input input string + * @return the input string with every character that's not in safeChars turned into its byte representation via the + * instance's encoder and then percent-encoded + * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected + * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is + * detected + */ + public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException { + stringHandler.reset(); + stringHandler.ensureCapacity(input.length()); + encode(input, stringHandler); + return stringHandler.getContents(); + } + + /** + * Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output. + * + * Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to. + * + */ + private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException, + UnmappableCharacterException { + // need to read from the char buffer, which was most recently written to + unsafeCharsToEncode.flip(); + + encodedBytes.clear(); + + encoder.reset(); + CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true); + checkResult(result); + result = encoder.flush(encodedBytes); + checkResult(result); + + // read contents of bytebuffer + encodedBytes.flip(); + + while (encodedBytes.hasRemaining()) { + byte b = encodedBytes.get(); + handler.onOutputChar('%'); + handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]); + handler.onOutputChar(HEX_CODE[b & 0xF]); + } + } + + /** + * @param result result to check + * @throws IllegalStateException if result is overflow + * @throws MalformedInputException if result represents malformed input + * @throws UnmappableCharacterException if result represents an unmappable character + */ + private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException { + if (result.isOverflow()) { + throw new IllegalStateException("Byte buffer overflow; this should not happen."); + } + if (result.isMalformed()) { + throw new MalformedInputException(result.length()); + } + if (result.isUnmappable()) { + throw new UnmappableCharacterException(result.length()); + } + } + + private class StringBuilderPercentEncoderOutputHandler { + + private final StringBuilder stringBuilder; + + StringBuilderPercentEncoderOutputHandler() { + stringBuilder = new StringBuilder(); + } + + String getContents() { + return stringBuilder.toString(); + } + + void reset() { + stringBuilder.setLength(0); + } + + void ensureCapacity(int length) { + stringBuilder.ensureCapacity(length); + } + + void onOutputChar(char c) { + stringBuilder.append(c); + } + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java b/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java new file mode 100755 index 0000000..5feb051 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/UrlBuilder.java @@ -0,0 +1,503 @@ +package org.xbib.oai.client.util; + +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import static org.xbib.oai.client.util.UrlPercentEncoders.getFragmentEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getMatrixEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getPathEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getQueryParamEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getRegNameEncoder; +import static org.xbib.oai.client.util.UrlPercentEncoders.getUnstructuredQueryEncoder; + +/** + * Builder for urls with url-encoding applied to path, query param, etc. + * + * Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec + * This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make + * HTTP-useful URLs. + */ +public final class UrlBuilder { + + /** + * IPv6 address, cribbed from StackOverflow + */ + private static final Pattern IPV6_PATTERN = Pattern + .compile( + "\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z"); + + /** + * IPv4 dotted quad + */ + private static final Pattern IPV4_PATTERN = Pattern + .compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z"); + + private final String scheme; + + private final String host; + + private final Integer port; + + private final List> queryParams = new ArrayList<>(); + + /** + * If this is non-null, queryParams must be empty, and vice versa. + */ + private String unstructuredQuery; + + private final List pathSegments = new ArrayList<>(); + + private final PercentEncoder pathEncoder = getPathEncoder(); + private final PercentEncoder regNameEncoder = getRegNameEncoder(); + private final PercentEncoder matrixEncoder = getMatrixEncoder(); + private final PercentEncoder queryParamEncoder = getQueryParamEncoder(); + private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder(); + private final PercentEncoder fragmentEncoder = getFragmentEncoder(); + + private String fragment; + + private boolean forceTrailingSlash = false; + + /** + * Create a URL with UTF-8 encoding. + * + * @param scheme scheme (e.g. http) + * @param host host (e.g. foo.com or 1.2.3.4 or [::1]) + * @param port null or a positive integer + */ + private UrlBuilder(String scheme, String host, Integer port) { + this.host = host; + this.scheme = scheme; + this.port = port; + } + + /** + * Create a URL with an null port and UTF-8 encoding. + * + * @param scheme scheme (e.g. http) + * @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal + * ([::1]), excluding IPvFuture since no one uses that in practice + * @return a url builder + * @see UrlBuilder#forHost(String scheme, String host, int port) + */ + public static UrlBuilder forHost(String scheme, String host) { + return new UrlBuilder(scheme, host, null); + } + + /** + * @param scheme scheme (e.g. http) + * @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal + * ([::1]), excluding IPvFuture since no one uses that in practice + * @param port port + * @return a url builder + */ + public static UrlBuilder forHost(String scheme, String host, int port) { + return new UrlBuilder(scheme, host, port); + } + + public static UrlBuilder fromUrl(String urlSpec) throws CharacterCodingException, MalformedURLException { + return fromUrl(new URL(urlSpec)); + } + + /** + * Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the + * query string apply. + * + * @param url url to initialize builder with + * @return a UrlBuilder containing the host, path, etc. from the url + * @throws CharacterCodingException if char decoding fails + * @see UrlBuilder#fromUrl(URL, CharsetDecoder) + */ + public static UrlBuilder fromUrl(URL url) throws CharacterCodingException { + return fromUrl(url, StandardCharsets.UTF_8.newDecoder()); + } + + /** + * Create a UrlBuilder initialized with the contents of a {@link URL}. + * + * The query string will be parsed into HTML4 query params if it can be separated into a + * &-separated sequence of key=value pairs. The sequence of query params can then be + * appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is + * only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that + * is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query + * string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link + * UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls + * to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed. + * + * @param url url to initialize builder with + * @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8) + * @return a UrlBuilder containing the host, path, etc. from the url + * @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to + * report errors + * @see UrlBuilder#fromUrl(URL, CharsetDecoder) + */ + public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws + CharacterCodingException { + + PercentDecoder decoder = new PercentDecoder(charsetDecoder); + // reg names must be encoded UTF-8 + PercentDecoder regNameDecoder; + if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) { + regNameDecoder = decoder; + } else { + regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder()); + } + + Integer port = url.getPort(); + if (port == -1) { + port = null; + } + + UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port); + + buildFromPath(builder, decoder, url); + + buildFromQuery(builder, decoder, url); + + if (url.getRef() != null) { + builder.fragment(decoder.decode(url.getRef())); + } + + return builder; + } + + /** + * Add a path segment. + * + * @param segment a path segment + * @return this + */ + public UrlBuilder pathSegment(String segment) { + pathSegments.add(new PathSegment(segment)); + return this; + } + + /** + * Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}. + * + * @param segments path segments + * @return this + */ + public UrlBuilder pathSegments(String... segments) { + for (String segment : segments) { + pathSegment(segment); + } + + return this; + } + + /** + * Add an HTML query parameter. Query parameters will be encoded in the order added. + * + * Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by + * HTML 4. + * + * If you use this method to build a query string, or created this builder from a url with a query string that can + * successfully be parsed into query param pairs, you cannot subsequently use {@link + * UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}. + * + * @param name param name + * @param value param value + * @return this + */ + public UrlBuilder queryParam(String name, String value) { + if (unstructuredQuery != null) { + throw new IllegalStateException( + "Cannot call queryParam() when this already has an unstructured query specified"); + } + + queryParams.add(Pair.of(name, value)); + return this; + } + + /** + * Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that + * is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite + * that query. + * + * If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you + * cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL, + * CharsetDecoder)}. + * + * @param query Complete URI query, as specified by RFC 3986. + * @return this + */ + public UrlBuilder unstructuredQuery(String query) { + if (!queryParams.isEmpty()) { + throw new IllegalStateException( + "Cannot call unstructuredQuery() when this already has queryParam pairs specified"); + } + + unstructuredQuery = query; + + return this; + } + + /** + * Clear the unstructured query and any query params. + * + * Since the query / query param situation is a little complicated, this method will let you remove all query + * information and start again from scratch. This may be useful when taking an existing url, parsing it into a + * builder, and then re-doing its query params, for instance. + * + * @return this + */ + public UrlBuilder clearQuery() { + queryParams.clear(); + unstructuredQuery = null; + + return this; + } + + /** + * Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the + * root. Matrix params will be encoded in the order added. + * + * @param name param name + * @param value param value + * @return this + */ + public UrlBuilder matrixParam(String name, String value) { + if (pathSegments.isEmpty()) { + // create an empty path segment to represent a matrix param applied to the root + pathSegment(""); + } + + PathSegment seg = pathSegments.get(pathSegments.size() - 1); + seg.matrixParams.add(Pair.of(name, value)); + return this; + } + + /** + * Set the fragment. + * + * @param fragment fragment string + * @return this + */ + public UrlBuilder fragment(String fragment) { + this.fragment = fragment; + return this; + } + + /** + * Force the generated URL to have a trailing slash at the end of the path. + * + * @return this + */ + public UrlBuilder forceTrailingSlash() { + forceTrailingSlash = true; + return this; + } + + public URL build() throws CharacterCodingException, MalformedURLException { + return new URL(toUrlString()); + } + + /** + * Encode the current builder state into a URL string. + * + * @return a well-formed URL string + * @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors + */ + public String toUrlString() throws CharacterCodingException { + StringBuilder buf = new StringBuilder(); + + buf.append(scheme); + buf.append("://"); + + buf.append(encodeHost(host)); + if (port != null) { + buf.append(':'); + buf.append(port); + } + + for (PathSegment pathSegment : pathSegments) { + buf.append('/'); + buf.append(pathEncoder.encode(pathSegment.segment)); + + for (Pair matrixParam : pathSegment.matrixParams) { + buf.append(';'); + buf.append(matrixEncoder.encode(matrixParam.getKey())); + buf.append('='); + buf.append(matrixEncoder.encode(matrixParam.getValue())); + } + } + + if (forceTrailingSlash) { + buf.append('/'); + } + + if (!queryParams.isEmpty()) { + buf.append("?"); + Iterator> qpIter = queryParams.iterator(); + while (qpIter.hasNext()) { + Pair queryParam = qpIter.next(); + buf.append(queryParamEncoder.encode(queryParam.getKey())); + buf.append('='); + buf.append(queryParamEncoder.encode(queryParam.getValue())); + if (qpIter.hasNext()) { + buf.append('&'); + } + } + } else if (unstructuredQuery != null) { + buf.append("?"); + buf.append(unstructuredQueryEncoder.encode(unstructuredQuery)); + } + + if (fragment != null) { + buf.append('#'); + buf.append(fragmentEncoder.encode(fragment)); + } + + return buf.toString(); + } + + /** + * Populate a url builder based on the query of a url + * + * @param builder builder + * @param decoder decoder + * @param url url + * @throws CharacterCodingException + */ + private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws + CharacterCodingException { + if (url.getQuery() != null) { + String q = url.getQuery(); + + // try to parse into &-separated key=value pairs + List> pairs = new ArrayList<>(); + boolean parseOk = true; + + for (String queryChunk : q.split("&")) { + String[] queryParamChunks = queryChunk.split("="); + + if (queryParamChunks.length != 2) { + parseOk = false; + break; + } + + pairs.add(Pair.of(decoder.decode(queryParamChunks[0]), + decoder.decode(queryParamChunks[1]))); + } + + if (parseOk) { + for (Pair pair : pairs) { + builder.queryParam(pair.getKey(), pair.getValue()); + } + } else { + builder.unstructuredQuery(decoder.decode(q)); + } + } + } + + /** + * Populate the path segments of a url builder from a url + * + * @param builder builder + * @param decoder decoder + * @param url url + * @throws CharacterCodingException + */ + private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws + CharacterCodingException { + for (String pathChunk : url.getPath().split("/")) { + if (pathChunk.equals("")) { + continue; + } + + if (pathChunk.charAt(0) == ';') { + builder.pathSegment(""); + // empty path segment, but matrix params + for (String matrixChunk : pathChunk.substring(1).split(";")) { + buildFromMatrixParamChunk(decoder, builder, matrixChunk); + } + + continue; + } + + // otherwise, path chunk is non empty and does not start with a ';' + + String[] matrixChunks = pathChunk.split(";"); + + // first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will + // not be included in the final url. + builder.pathSegment(decoder.decode(matrixChunks[0])); + + // if there any other chunks, they're matrix param pairs + for (int i = 1; i < matrixChunks.length; i++) { + buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]); + } + } + } + + private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws + CharacterCodingException { + String[] mtxPair = pathMatrixChunk.split("="); + if (mtxPair.length != 2) { + throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">"); + } + + String mtxName = mtxPair[0]; + String mtxVal = mtxPair[1]; + ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal)); + } + + /** + * @param host original host string + * @return host encoded as in RFC 3986 section 3.2.2 + */ + private String encodeHost(String host) throws CharacterCodingException { + // matching order: IP-literal, IPv4, reg-name + if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) { + return host; + } + + // it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL) + return regNameEncoder.encode(host); + } + + /** + * Bundle of a path segment name and any associated matrix params. + */ + private static class PathSegment { + private final String segment; + private final List> matrixParams = new ArrayList<>(); + + PathSegment(String segment) { + this.segment = segment; + } + } + + static class Pair { + + K key; + + V value; + + Pair(K key, V value) { + this.key = key; + this.value = value; + } + + static Pair of(K key, V value) { + return new Pair<>(key, value); + } + + K getKey() { + return key; + } + + V getValue() { + return value; + } + + } +} diff --git a/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java b/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java new file mode 100755 index 0000000..222fd98 --- /dev/null +++ b/oai-client/src/main/java/org/xbib/oai/client/util/UrlPercentEncoders.java @@ -0,0 +1,164 @@ +package org.xbib.oai.client.util; + +import java.nio.charset.StandardCharsets; +import java.util.BitSet; +import static java.nio.charset.CodingErrorAction.REPLACE; + +/** + * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding. + */ +public final class UrlPercentEncoders { + + /** + * an encoder for RFC 3986 reg-names + */ + + private static final BitSet REG_NAME_BIT_SET = new BitSet(); + + private static final BitSet PATH_BIT_SET = new BitSet(); + private static final BitSet MATRIX_BIT_SET = new BitSet(); + private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet(); + private static final BitSet QUERY_PARAM_BIT_SET = new BitSet(); + private static final BitSet FRAGMENT_BIT_SET = new BitSet(); + + static { + // RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this. + // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant. + addUnreserved(REG_NAME_BIT_SET); + addSubdelims(REG_NAME_BIT_SET); + + // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section. + addPChar(PATH_BIT_SET); + PATH_BIT_SET.clear((int) ';'); + + // Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded. + addPChar(MATRIX_BIT_SET); + MATRIX_BIT_SET.clear((int) ';'); + MATRIX_BIT_SET.clear((int) '='); + + /* + * At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also + * specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is + */ + addQuery(UNSTRUCTURED_QUERY_BIT_SET); + UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+'); + + /* + * Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value + * pairs can be used. + */ + QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET); + QUERY_PARAM_BIT_SET.clear((int) '='); + QUERY_PARAM_BIT_SET.clear((int) '&'); + + addFragment(FRAGMENT_BIT_SET); + } + + public static PercentEncoder getRegNameEncoder() { + return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getPathEncoder() { + return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getMatrixEncoder() { + return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getUnstructuredQueryEncoder() { + return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getQueryParamEncoder() { + return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + public static PercentEncoder getFragmentEncoder() { + return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + private UrlPercentEncoders() { + } + + /** + * Add code points for 'fragment' chars + * + * @param fragmentBitSet bit set + */ + private static void addFragment(BitSet fragmentBitSet) { + addPChar(fragmentBitSet); + fragmentBitSet.set((int) '/'); + fragmentBitSet.set((int) '?'); + } + + /** + * Add code points for 'query' chars + * + * @param queryBitSet bit set + */ + private static void addQuery(BitSet queryBitSet) { + addPChar(queryBitSet); + queryBitSet.set((int) '/'); + queryBitSet.set((int) '?'); + } + + /** + * Add code points for 'pchar' chars. + * + * @param bs bitset + */ + private static void addPChar(BitSet bs) { + addUnreserved(bs); + addSubdelims(bs); + bs.set((int) ':'); + bs.set((int) '@'); + } + + /** + * Add codepoints for 'unreserved' chars + * + * @param bs bitset to add codepoints to + */ + private static void addUnreserved(BitSet bs) { + + for (int i = 'a'; i <= 'z'; i++) { + bs.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + bs.set(i); + } + for (int i = '0'; i <= '9'; i++) { + bs.set(i); + } + bs.set((int) '-'); + bs.set((int) '.'); + bs.set((int) '_'); + bs.set((int) '~'); + } + + /** + * Add codepoints for 'sub-delims' chars + * + * @param bs bitset to add codepoints to + */ + private static void addSubdelims(BitSet bs) { + bs.set((int) '!'); + bs.set((int) '$'); + bs.set((int) '&'); + bs.set((int) '\''); + bs.set((int) '('); + bs.set((int) ')'); + bs.set((int) '*'); + bs.set((int) '+'); + bs.set((int) ','); + bs.set((int) ';'); + bs.set((int) '='); + } +} diff --git a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java index b8c3ca3..933515a 100644 --- a/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java +++ b/oai-client/src/test/java/org/xbib/oai/client/DOAJClientTest.java @@ -23,13 +23,15 @@ class DOAJClientTest { IdentifyResponse identifyResponse = oaiClient.identify(); String granularity = identifyResponse.getGranularity(); logger.log(Level.INFO, "granularity = " + granularity); - DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ? - DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null; + // override granularity because of "bad arguments" error. Seems DOAJ is unable to manage it's own declared granularity. + DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")); Handler handler = new Handler(); SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false); oaiClient.setSplitWriter(splitWriter); + Instant to = Instant.now(); + Instant from = to.atZone(ZoneId.systemDefault()).minusMonths(1).toInstant(); oaiClient.listRecords("oai_dc", null, - dateTimeFormatter,Instant.parse("2021-05-01T00:00:00Z"), Instant.parse("2021-06-01T00:00:00Z"), null, + dateTimeFormatter, from, to, null, handler, null); logger.log(Level.INFO, "count = " + handler.count()); assertTrue(handler.count() > 0); diff --git a/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java b/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java new file mode 100755 index 0000000..9a61649 --- /dev/null +++ b/oai-client/src/test/java/org/xbib/oai/client/util/PercentEncoderTest.java @@ -0,0 +1,84 @@ +package org.xbib.oai.client.util; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.nio.charset.CharacterCodingException; +import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; +import java.nio.charset.UnmappableCharacterException; +import java.util.BitSet; + +import static java.nio.charset.CodingErrorAction.REPLACE; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public final class PercentEncoderTest { + + private static PercentEncoder alnum; + private static PercentEncoder alnum16; + + @BeforeAll + public static void setUp() { + BitSet bs = new BitSet(); + for (int i = 'a'; i <= 'z'; i++) { + bs.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + bs.set(i); + } + for (int i = '0'; i <= '9'; i++) { + bs.set(i); + } + + alnum = new PercentEncoder(bs, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + alnum16 = new PercentEncoder(bs, StandardCharsets.UTF_16BE.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + } + + @Test + public void testDoesntEncodeSafe() throws CharacterCodingException { + BitSet set = new BitSet(); + for (int i = 'a'; i <= 'z'; i++) { + set.set(i); + } + + PercentEncoder pe = new PercentEncoder(set, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE) + .onUnmappableCharacter(REPLACE)); + assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD")); + } + + @Test + public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException { + assertEquals("abc%20123", alnum.encode("abc 123")); + } + + @Test + public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException { + assertEquals("%20abc%20", alnum.encode(" abc ")); + } + + @Test + public void testEncodeUtf8() throws CharacterCodingException { + // 1 UTF-16 char (unicode snowman) + assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603")); + } + + @Test + public void testEncodeUtf8SurrogatePair() throws CharacterCodingException { + // musical G clef: 1d11e, has to be represented in surrogate pair form + assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e")); + } + + @Test + public void testEncodeUtf16() throws CharacterCodingException { + // 1 UTF-16 char (unicode snowman) + assertEquals("snowman%26%03", alnum16.encode("snowman\u2603")); + } + + @Test + public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException { + // musical G clef: 1d11e, has to be represented in surrogate pair form + assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e")); + } +} diff --git a/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java b/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java new file mode 100755 index 0000000..0d3d30a --- /dev/null +++ b/oai-client/src/test/java/org/xbib/oai/client/util/UrlBuilderTest.java @@ -0,0 +1,425 @@ +package org.xbib.oai.client.util; + +import org.junit.jupiter.api.Test; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.CharacterCodingException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +public final class UrlBuilderTest { + + @Test + public void testNoUrlParts() throws Exception { + assertUrlEquals("http://foo.com", UrlBuilder.forHost("http", "foo.com").toUrlString()); + } + + @Test + public void testWithPort() throws Exception { + assertUrlEquals("http://foo.com:33", UrlBuilder.forHost("http", "foo.com", 33).toUrlString()); + } + + @Test + public void testSimplePath() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("seg1").pathSegment("seg2"); + assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString()); + } + + @Test + public void testPathWithReserved() throws Exception { + // RFC 1738 S3.3 + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("seg/;?ment").pathSegment("seg=&2"); + assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString()); + } + + @Test + public void testPathSegments() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegments("seg1", "seg2", "seg3"); + assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString()); + } + + @Test + public void testMatrixWithoutPathHasLeadingSlash() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.matrixParam("foo", "bar"); + assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString()); + } + + @Test + public void testMatrixWithReserved() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com") + .pathSegment("foo") + .matrixParam("foo", "bar") + .matrixParam("res;=?#/erved", "value") + .pathSegment("baz"); + assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString()); + } + + @Test + public void testUrlEncodedPathSegmentUtf8() throws Exception { + // 1 UTF-16 char + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.pathSegment("snowman").pathSegment("\u2603"); + assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString()); + } + + @Test + public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + // musical G clef: 1d11e, has to be represented in surrogate pair form + ub.pathSegment("clef").pathSegment("\ud834\udd1e"); + assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString()); + } + + @Test + public void testQueryParamNoPath() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + String s = ub.toUrlString(); + assertUrlEquals("http://foo.com?foo=bar", s); + } + + @Test + public void testQueryParamsDuplicated() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + ub.queryParam("foo", "bar2"); + ub.queryParam("baz", "quux"); + ub.queryParam("baz", "quux2"); + assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString()); + } + + @Test + public void testEncodeQueryParams() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar&=#baz"); + ub.queryParam("foo", "bar?/2"); + assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString()); + } + + @Test + public void testEncodeQueryParamWithSpaceAndPlus() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "spa ce"); + ub.queryParam("fo+o", "plus+"); + assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString()); + } + + @Test + public void testPlusInVariousParts() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + + ub.pathSegment("has+plus") + .matrixParam("plusMtx", "pl+us") + .queryParam("plusQp", "pl+us") + .fragment("plus+frag"); + + assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString()); + } + + @Test + public void testFragment() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com"); + ub.queryParam("foo", "bar"); + ub.fragment("#frag/?"); + assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString()); + } + + @Test + public void testAllParts() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.bar.com", 3333); + ub.pathSegment("foo"); + ub.pathSegment("bar"); + ub.matrixParam("mtx1", "val1"); + ub.matrixParam("mtx2", "val2"); + ub.queryParam("q1", "v1"); + ub.queryParam("q2", "v2"); + ub.fragment("zomg it's a fragment"); + + assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment", + ub.toUrlString()); + } + + @Test + public void testIPv4Literal() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "127.0.0.1"); + assertUrlEquals("http://127.0.0.1", ub.toUrlString()); + } + + @Test + public void testBadIPv4LiteralDoesntChoke() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "300.100.50.1"); + assertUrlEquals("http://300.100.50.1", ub.toUrlString()); + } + + @Test + public void testIPv6LiteralLocalhost() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "[::1]"); + assertUrlEquals("http://[::1]", ub.toUrlString()); + } + + @Test + public void testIPv6Literal() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "[2001:db8:85a3::8a2e:370:7334]"); + assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString()); + } + + @Test + public void testEncodedRegNameSingleByte() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host?name;"); + assertUrlEquals("http://host%3Fname;", ub.toUrlString()); + } + + @Test + public void testEncodedRegNameMultiByte() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "snow\u2603man"); + assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlash() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c"); + + assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlashWithQueryParams() throws Exception { + UrlBuilder ub = + UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar"); + + assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString()); + } + + @Test + public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1"); + + assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString()); + } + + @Test + public void testIntermingledMatrixParamsAndPathSegments() throws Exception { + + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com") + .pathSegments("seg1", "seg2") + .matrixParam("m1", "v1") + .pathSegment("seg3") + .matrixParam("m2", "v2"); + + assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString()); + } + + @Test + public void testFromUrlWithEverything() throws URISyntaxException, CharacterCodingException, MalformedURLException { + String orig = + "https://foo.bar.com:3333/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragment"; + assertUrlBuilderRoundtrip(orig); + } + + @Test + public void testFromUrlWithEmptyPath() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com"); + } + + @Test + public void testFromUrlWithEmptyPathAndSlash() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com"); + } + + @Test + public void testFromUrlWithPort() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com:1234"); + } + + @Test + public void testFromUrlWithEmptyPathSegent() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo"); + } + + @Test + public void testFromUrlWithEncodedHost() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://f%20oo.com/bar"); + } + + @Test + public void testFromUrlWithEncodedPathSegment() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar"); + } + + @Test + public void testFromUrlWithEncodedMatrixParam() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202"); + } + + @Test + public void testFromUrlWithEncodedQueryParam() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2"); + } + + @Test + public void testFromUrlWithEncodedQueryParamDelimiter() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2"); + } + + @Test + public void testFromUrlWithEncodedFragment() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar"); + } + + @Test + public void testFromUrlWithMalformedMatrixPair() throws MalformedURLException, CharacterCodingException { + try { + UrlBuilder.fromUrl("http://foo.com/foo;m1=v1=v2"); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("Malformed matrix param: ", e.getMessage()); + } + } + + @Test + public void testFromUrlWithEmptyPathSegmentWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1"); + } + + @Test + public void testFromUrlWithEmptyPathWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/;m1=v1"); + } + + @Test + public void testFromUrlWithEmptyPathWithMultipleMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2"); + } + + @Test + public void testFromUrlWithPathSegmentEndingWithSemicolon() throws URISyntaxException, CharacterCodingException, MalformedURLException { + assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo"); + } + + @Test + public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException { + try { + UrlBuilder.fromUrl("http://foo.com/fo%2o"); + fail(); + } catch (IllegalArgumentException e) { + assertEquals("Invalid %-tuple <%2o>", e.getMessage()); + } + } + + @Test + public void testFromUrlMalformedQueryParamMultiValues() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2"); + } + + @Test + public void testFromUrlMalformedQueryParamNoValue() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2"); + } + + @Test + public void testFromUrlUnstructuredQueryWithEscapedChars() throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23"); + } + + @Test + public void testCantUseQueryParamAfterQuery() { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q"); + + try { + ub.queryParam("foo", "bar"); + fail(); + } catch (IllegalStateException e) { + assertEquals("Cannot call queryParam() when this already has an unstructured query specified", + e.getMessage()); + } + } + + @Test + public void testCantUseQueryAfterQueryParam() { + UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").queryParam("foo", "bar"); + + try { + ub.unstructuredQuery("q"); + + fail(); + } catch (IllegalStateException e) { + assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified", + e.getMessage()); + } + } + + @Test + public void testUnstructuredQueryWithNoSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q").toUrlString()); + } + + @Test + public void testUnstructuredQueryWithOkSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q?/&=", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString()); + } + + @Test + public void testUnstructuredQueryWithEscapedSpecialChars() throws Exception { + assertUrlEquals("http://foo.com?q%23%2B", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString()); + } + + @Test + public void testClearQueryRemovesQueryParam() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .queryParam("foo", "bar") + .clearQuery(); + assertUrlEquals("http://host", ub.toUrlString()); + } + + @Test + public void testClearQueryRemovesUnstructuredQuery() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .unstructuredQuery("foobar") + .clearQuery(); + assertUrlEquals("http://host", ub.toUrlString()); + } + + @Test + public void testClearQueryAfterQueryParamAllowsQuery() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .queryParam("foo", "bar") + .clearQuery() + .unstructuredQuery("foobar"); + assertUrlEquals("http://host?foobar", ub.toUrlString()); + } + + @Test + public void testClearQueryAfterQueryAllowsQueryParam() throws Exception { + UrlBuilder ub = UrlBuilder.forHost("http", "host") + .unstructuredQuery("foobar") + .clearQuery() + .queryParam("foo", "bar"); + assertUrlEquals("http://host?foo=bar", ub.toUrlString()); + } + + private void assertUrlBuilderRoundtrip(String url) throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlBuilderRoundtrip(url, url); + } + + /** + * @param origUrl the url that will be used to create a URL + * @param finalUrl the URL string it should end up as + */ + private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl) throws MalformedURLException, CharacterCodingException, URISyntaxException { + assertUrlEquals(finalUrl, UrlBuilder.fromUrl(new URL(origUrl)).toUrlString()); + } + + private static void assertUrlEquals(String expected, String actual) throws URISyntaxException, MalformedURLException { + assertEquals(expected, actual); + assertEquals(expected, new URI(actual).toString()); + assertEquals(expected, new URL(actual).toString()); + } +} diff --git a/oai-common/build.gradle b/oai-common/build.gradle index ef91f94..733943d 100644 --- a/oai-common/build.gradle +++ b/oai-common/build.gradle @@ -1,6 +1,6 @@ dependencies { - api "org.xbib:content-core:${project.property('xbib-content.version')}" - api "org.xbib:content-rdf:${project.property('xbib-content.version')}" - api "org.xbib:content-resource:${project.property('xbib-content.version')}" - api "org.xbib:content-xml:${project.property('xbib-content.version')}" + api libs.content.core + api libs.content.rdf + api libs.content.resource + api libs.content.xml } diff --git a/settings.gradle b/settings.gradle index a1d9ded..aa5ef68 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,23 @@ +dependencyResolutionManagement { + versionCatalogs { + libs { + version('gradle', '7.5.1') + version('junit', '5.9.1') + version('content', '5.0.1') + library('junit-jupiter-api', 'org.junit.jupiter', 'junit-jupiter-api').versionRef('junit') + library('junit-jupiter-params', 'org.junit.jupiter', 'junit-jupiter-params').versionRef('junit') + library('junit-jupiter-engine', 'org.junit.jupiter', 'junit-jupiter-engine').versionRef('junit') + library('hamcrest', 'org.hamcrest', 'hamcrest-library').version('2.2') + library('content-core', 'org.xbib', 'content-json').versionRef('content') + library('content-rdf', 'org.xbib', 'content-rdf').versionRef('content') + library('content-resource', 'org.xbib', 'content-resource').versionRef('content') + library('content-xml', 'org.xbib', 'content-xml').versionRef('content') + library('marc', 'org.xbib', 'marc').version('2.7.0') + library('charactersets', 'org.xbib', 'bibliographic-character-sets').version('2.0.0') + } + } +} + include 'oai-common' include 'oai-client' include 'oai-server'