update to Gradle 7.5.1, update to Java 17, drop xbib net dependency because we rely on JDK HTTP client, add custom URL builder

This commit is contained in:
Jörg Prante 2022-10-17 10:39:41 +02:00
parent f81b656f25
commit ea79e7afd6
20 changed files with 1621 additions and 33 deletions

View file

@ -4,7 +4,7 @@ plugins {
}
wrapper {
gradleVersion = "${project.property('gradle.wrapper.version')}"
gradleVersion = libs.versions.gradle.get()
distributionType = Wrapper.DistributionType.ALL
}

View file

@ -1,3 +1,5 @@
group = org.xbib
name = oai
version = 3.0.0
gradle.wrapper.version = 7.5.1

View file

@ -6,13 +6,13 @@ java {
}
compileJava {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
}
compileTestJava {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
}
jar {

View file

@ -1,12 +1,8 @@
def junitVersion = project.hasProperty('junit.version')?project.property('junit.version'):'5.8.2'
def hamcrestVersion = project.hasProperty('hamcrest.version')?project.property('hamcrest.version'):'2.2'
dependencies {
testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}"
testImplementation "org.junit.jupiter:junit-jupiter-params:${junitVersion}"
testImplementation "org.hamcrest:hamcrest-library:${hamcrestVersion}"
testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}"
testImplementation libs.junit.jupiter.api
testImplementation libs.junit.jupiter.params
testImplementation libs.hamcrest
testRuntimeOnly libs.junit.jupiter.engine
}
test {

Binary file not shown.

View file

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-all.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

6
gradlew vendored
View file

@ -205,6 +205,12 @@ set -- \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.

10
gradlew.bat vendored
View file

@ -40,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute
if %ERRORLEVEL% equ 0 goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
@ -75,13 +75,15 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal

View file

@ -1,5 +1,5 @@
dependencies {
api project(':oai-common')
testImplementation "org.xbib:marc:${project.property('xbib-marc.version')}"
testImplementation "org.xbib:bibliographic-character-sets:${project.property('xbib-bibliographic-character-sets.version')}"
testImplementation libs.marc
testImplementation libs.charactersets
}

View file

@ -7,7 +7,7 @@ module org.xbib.oai.client {
exports org.xbib.oai.client.listrecords;
exports org.xbib.oai.client.listsets;
requires org.xbib.oai;
requires org.xbib.net.url;
//requires org.xbib.net;
requires org.xbib.content.xml;
requires java.xml;
requires java.logging;

View file

@ -1,11 +1,11 @@
package org.xbib.oai.client;
import org.xbib.net.URL;
import org.xbib.oai.OAIConstants;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.client.util.UrlBuilder;
import org.xbib.oai.exceptions.NoRecordsMatchException;
import org.xbib.oai.util.ResumptionToken;
import org.xbib.oai.xml.MetadataHandler;
@ -73,7 +73,7 @@ public class OAIClient {
public IdentifyResponse identify() throws IOException, InterruptedException {
IdentifyRequest identifyRequest = new IdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
URL.Builder url = URL.from(baseURL).mutator();
UrlBuilder url = UrlBuilder.fromUrl(baseURL);
identifyRequest.getParams().forEach(url::queryParam);
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(url.build().toExternalForm()))
@ -180,7 +180,7 @@ public class OAIClient {
listRecordsRequest.addHandler(handler);
}
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
URL.Builder url = URL.from(baseURL).mutator();
UrlBuilder url = UrlBuilder.fromUrl(baseURL);
// kind of hacky here - suppress all OAI params if resumption token is present
if (listRecordsRequest.getResumptionToken() == null) {
listRecordsRequest.getParams().forEach(url::queryParam);
@ -188,6 +188,7 @@ public class OAIClient {
url.queryParam(OAIConstants.VERB_PARAMETER, OAIConstants.LIST_RECORDS);
url.queryParam(OAIConstants.RESUMPTION_TOKEN_PARAMETER, listRecordsRequest.getResumptionToken().toString());
}
URI uri = URI.create(url.build().toExternalForm());
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(uri)

View file

@ -0,0 +1,196 @@
package org.xbib.oai.client.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import static java.nio.charset.CoderResult.OVERFLOW;
import static java.nio.charset.CoderResult.UNDERFLOW;
/**
* Decodes percent-encoded (%XX) Unicode text.
*/
public final class PercentDecoder {
/**
* bytes represented by the current sequence of %-triples. Resized as needed.
*/
private ByteBuffer encodedBuf;
/**
* Written to with decoded chars by decoder
*/
private final CharBuffer decodedCharBuf;
private final CharsetDecoder decoder;
/**
* The decoded string for the current input
*/
private final StringBuilder outputBuf = new StringBuilder();
/**
* Construct a new PercentDecoder with default buffer sizes.
*
* @param charsetDecoder Charset to decode bytes into chars with
* @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
*/
public PercentDecoder(CharsetDecoder charsetDecoder) {
this(charsetDecoder, 16, 16);
}
/**
* @param charsetDecoder Charset to decode bytes into chars with
* @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
* @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
*/
public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
int decodedCharBufSize) {
encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
decoder = charsetDecoder;
}
/**
* @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
* "%20" for a space character
* @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
* @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
* detected
*/
public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
outputBuf.setLength(0);
// this is almost always an underestimate of the size needed:
// only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
outputBuf.ensureCapacity(input.length() / 8);
encodedBuf.clear();
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c != '%') {
handleEncodedBytes();
outputBuf.append(c);
continue;
}
if (i + 2 >= input.length()) {
throw new IllegalArgumentException(
"Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
}
// grow the byte buf if needed
if (encodedBuf.remaining() == 0) {
ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
encodedBuf.flip();
largerBuf.put(encodedBuf);
encodedBuf = largerBuf;
}
// note that we advance i here as we consume chars
int msBits = Character.digit(input.charAt(++i), 16);
int lsBits = Character.digit(input.charAt(++i), 16);
if (msBits == -1 || lsBits == -1) {
throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
}
msBits <<= 4;
msBits |= lsBits;
// msBits can only have 8 bits set, so cast is safe
encodedBuf.put((byte) msBits);
}
handleEncodedBytes();
return outputBuf.toString();
}
/**
* Decode any buffered encoded bytes and write them to the output buf.
*/
private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
if (encodedBuf.position() == 0) {
// nothing to do
return;
}
decoder.reset();
CoderResult coderResult;
// switch to reading mode
encodedBuf.flip();
// loop while we're filling up the decoded char buf, or there's any encoded bytes
// decode() in practice seems to only consume bytes when it can decode an entire char...
do {
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
throwIfError(coderResult);
appendDecodedChars();
} while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
// final decode with end-of-input flag
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
throwIfError(coderResult);
if (encodedBuf.hasRemaining()) {
throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
}
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
}
appendDecodedChars();
// we've finished the input, wrap it up
encodedBuf.clear();
flush();
}
/**
* Must only be called when the input encoded bytes buffer is empty
*/
private void flush() throws MalformedInputException, UnmappableCharacterException {
CoderResult coderResult;
decodedCharBuf.clear();
coderResult = decoder.flush(decodedCharBuf);
appendDecodedChars();
throwIfError(coderResult);
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Decoder flush resulted in " + coderResult);
}
}
/**
* If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
* CharacterCodingException.
*
* @param coderResult result to check
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
if (coderResult.isMalformed()) {
throw new MalformedInputException(coderResult.length());
}
if (coderResult.isUnmappable()) {
throw new UnmappableCharacterException(coderResult.length());
} }
/**
* Flip the decoded char buf and append it to the string bug
*/
private void appendDecodedChars() {
decodedCharBuf.flip();
outputBuf.append(decodedCharBuf);
}
}

View file

@ -0,0 +1,187 @@
package org.xbib.oai.client.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import java.util.BitSet;
import static java.lang.Character.isHighSurrogate;
import static java.lang.Character.isLowSurrogate;
/**
* Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
*
* This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
* PercentEncoder instances.
*/
public final class PercentEncoder {
private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
private final BitSet safeChars;
private final CharsetEncoder encoder;
/**
* Pre-allocate a string handler to make the common case of encoding to a string faster
*/
private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
private final ByteBuffer encodedBytes;
private final CharBuffer unsafeCharsToEncode;
/**
* @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
* those chars set to true. Treated as read only.
* @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances
* across threads.
*/
public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) {
this.safeChars = safeChars;
this.encoder = charsetEncoder;
// why is this a float? sigh.
int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
// need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
unsafeCharsToEncode = CharBuffer.allocate(2);
}
/**
* Encode the input and pass output chars to a handler.
*
* @param input input string
* @param handler handler to call on each output character
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws
MalformedInputException, UnmappableCharacterException {
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (safeChars.get(c)) {
handler.onOutputChar(c);
continue;
}
// not a safe char
unsafeCharsToEncode.clear();
unsafeCharsToEncode.append(c);
if (isHighSurrogate(c)) {
if (input.length() > i + 1) {
// get the low surrogate as well
char lowSurrogate = input.charAt(i + 1);
if (isLowSurrogate(lowSurrogate)) {
unsafeCharsToEncode.append(lowSurrogate);
i++;
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
.toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
.toHexString(lowSurrogate) + ")");
}
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
.toHexString(c) + ")");
}
}
flushUnsafeCharBuffer(handler);
}
}
/**
* Encode the input and return the resulting text as a String.
*
* @param input input string
* @return the input string with every character that's not in safeChars turned into its byte representation via the
* instance's encoder and then percent-encoded
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
stringHandler.reset();
stringHandler.ensureCapacity(input.length());
encode(input, stringHandler);
return stringHandler.getContents();
}
/**
* Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
*
* Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
*
*/
private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException,
UnmappableCharacterException {
// need to read from the char buffer, which was most recently written to
unsafeCharsToEncode.flip();
encodedBytes.clear();
encoder.reset();
CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
checkResult(result);
result = encoder.flush(encodedBytes);
checkResult(result);
// read contents of bytebuffer
encodedBytes.flip();
while (encodedBytes.hasRemaining()) {
byte b = encodedBytes.get();
handler.onOutputChar('%');
handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
handler.onOutputChar(HEX_CODE[b & 0xF]);
}
}
/**
* @param result result to check
* @throws IllegalStateException if result is overflow
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
if (result.isOverflow()) {
throw new IllegalStateException("Byte buffer overflow; this should not happen.");
}
if (result.isMalformed()) {
throw new MalformedInputException(result.length());
}
if (result.isUnmappable()) {
throw new UnmappableCharacterException(result.length());
}
}
private class StringBuilderPercentEncoderOutputHandler {
private final StringBuilder stringBuilder;
StringBuilderPercentEncoderOutputHandler() {
stringBuilder = new StringBuilder();
}
String getContents() {
return stringBuilder.toString();
}
void reset() {
stringBuilder.setLength(0);
}
void ensureCapacity(int length) {
stringBuilder.ensureCapacity(length);
}
void onOutputChar(char c) {
stringBuilder.append(c);
}
}
}

View file

@ -0,0 +1,503 @@
package org.xbib.oai.client.util;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import static org.xbib.oai.client.util.UrlPercentEncoders.getFragmentEncoder;
import static org.xbib.oai.client.util.UrlPercentEncoders.getMatrixEncoder;
import static org.xbib.oai.client.util.UrlPercentEncoders.getPathEncoder;
import static org.xbib.oai.client.util.UrlPercentEncoders.getQueryParamEncoder;
import static org.xbib.oai.client.util.UrlPercentEncoders.getRegNameEncoder;
import static org.xbib.oai.client.util.UrlPercentEncoders.getUnstructuredQueryEncoder;
/**
* Builder for urls with url-encoding applied to path, query param, etc.
*
* Escaping rules are from RFC 3986, RFC 1738 and the <a href ="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4 spec</a>
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
* HTTP-useful URLs.
*/
public final class UrlBuilder {
/**
* IPv6 address, cribbed from <a href="http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings">StackOverflow</a>
*/
private static final Pattern IPV6_PATTERN = Pattern
.compile(
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
/**
* IPv4 dotted quad
*/
private static final Pattern IPV4_PATTERN = Pattern
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
private final String scheme;
private final String host;
private final Integer port;
private final List<Pair<String, String>> queryParams = new ArrayList<>();
/**
* If this is non-null, queryParams must be empty, and vice versa.
*/
private String unstructuredQuery;
private final List<PathSegment> pathSegments = new ArrayList<>();
private final PercentEncoder pathEncoder = getPathEncoder();
private final PercentEncoder regNameEncoder = getRegNameEncoder();
private final PercentEncoder matrixEncoder = getMatrixEncoder();
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
private String fragment;
private boolean forceTrailingSlash = false;
/**
* Create a URL with UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
* @param port null or a positive integer
*/
private UrlBuilder(String scheme, String host, Integer port) {
this.host = host;
this.scheme = scheme;
this.port = port;
}
/**
* Create a URL with an null port and UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @return a url builder
* @see UrlBuilder#forHost(String scheme, String host, int port)
*/
public static UrlBuilder forHost(String scheme, String host) {
return new UrlBuilder(scheme, host, null);
}
/**
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @param port port
* @return a url builder
*/
public static UrlBuilder forHost(String scheme, String host, int port) {
return new UrlBuilder(scheme, host, port);
}
public static UrlBuilder fromUrl(String urlSpec) throws CharacterCodingException, MalformedURLException {
return fromUrl(new URL(urlSpec));
}
/**
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
* query string apply.
*
* @param url url to initialize builder with
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if char decoding fails
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url) throws CharacterCodingException {
return fromUrl(url, StandardCharsets.UTF_8.newDecoder());
}
/**
* Create a UrlBuilder initialized with the contents of a {@link URL}.
*
* The query string will be parsed into HTML4 query params if it can be separated into a
* <code>&amp;</code>-separated sequence of <code>key=value</code> pairs. The sequence of query params can then be
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
*
* @param url url to initialize builder with
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
* report errors
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws
CharacterCodingException {
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
// reg names must be encoded UTF-8
PercentDecoder regNameDecoder;
if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) {
regNameDecoder = decoder;
} else {
regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
}
Integer port = url.getPort();
if (port == -1) {
port = null;
}
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
buildFromPath(builder, decoder, url);
buildFromQuery(builder, decoder, url);
if (url.getRef() != null) {
builder.fragment(decoder.decode(url.getRef()));
}
return builder;
}
/**
* Add a path segment.
*
* @param segment a path segment
* @return this
*/
public UrlBuilder pathSegment(String segment) {
pathSegments.add(new PathSegment(segment));
return this;
}
/**
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
*
* @param segments path segments
* @return this
*/
public UrlBuilder pathSegments(String... segments) {
for (String segment : segments) {
pathSegment(segment);
}
return this;
}
/**
* Add an HTML query parameter. Query parameters will be encoded in the order added.
*
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
* <a href="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4</a>.
*
* If you use this method to build a query string, or created this builder from a url with a query string that can
* successfully be parsed into query param pairs, you cannot subsequently use {@link
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder queryParam(String name, String value) {
if (unstructuredQuery != null) {
throw new IllegalStateException(
"Cannot call queryParam() when this already has an unstructured query specified");
}
queryParams.add(Pair.of(name, value));
return this;
}
/**
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
* that query.
*
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
* CharsetDecoder)}.
*
* @param query Complete URI query, as specified by <a href="https://tools.ietf.org/html/rfc3986#section-3.4">RFC 3986</a>.
* @return this
*/
public UrlBuilder unstructuredQuery(String query) {
if (!queryParams.isEmpty()) {
throw new IllegalStateException(
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
}
unstructuredQuery = query;
return this;
}
/**
* Clear the unstructured query and any query params.
*
* Since the query / query param situation is a little complicated, this method will let you remove all query
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
* builder, and then re-doing its query params, for instance.
*
* @return this
*/
public UrlBuilder clearQuery() {
queryParams.clear();
unstructuredQuery = null;
return this;
}
/**
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
* root. Matrix params will be encoded in the order added.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder matrixParam(String name, String value) {
if (pathSegments.isEmpty()) {
// create an empty path segment to represent a matrix param applied to the root
pathSegment("");
}
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
seg.matrixParams.add(Pair.of(name, value));
return this;
}
/**
* Set the fragment.
*
* @param fragment fragment string
* @return this
*/
public UrlBuilder fragment(String fragment) {
this.fragment = fragment;
return this;
}
/**
* Force the generated URL to have a trailing slash at the end of the path.
*
* @return this
*/
public UrlBuilder forceTrailingSlash() {
forceTrailingSlash = true;
return this;
}
public URL build() throws CharacterCodingException, MalformedURLException {
return new URL(toUrlString());
}
/**
* Encode the current builder state into a URL string.
*
* @return a well-formed URL string
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
*/
public String toUrlString() throws CharacterCodingException {
StringBuilder buf = new StringBuilder();
buf.append(scheme);
buf.append("://");
buf.append(encodeHost(host));
if (port != null) {
buf.append(':');
buf.append(port);
}
for (PathSegment pathSegment : pathSegments) {
buf.append('/');
buf.append(pathEncoder.encode(pathSegment.segment));
for (Pair<String, String> matrixParam : pathSegment.matrixParams) {
buf.append(';');
buf.append(matrixEncoder.encode(matrixParam.getKey()));
buf.append('=');
buf.append(matrixEncoder.encode(matrixParam.getValue()));
}
}
if (forceTrailingSlash) {
buf.append('/');
}
if (!queryParams.isEmpty()) {
buf.append("?");
Iterator<Pair<String, String>> qpIter = queryParams.iterator();
while (qpIter.hasNext()) {
Pair<String, String> queryParam = qpIter.next();
buf.append(queryParamEncoder.encode(queryParam.getKey()));
buf.append('=');
buf.append(queryParamEncoder.encode(queryParam.getValue()));
if (qpIter.hasNext()) {
buf.append('&');
}
}
} else if (unstructuredQuery != null) {
buf.append("?");
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
}
if (fragment != null) {
buf.append('#');
buf.append(fragmentEncoder.encode(fragment));
}
return buf.toString();
}
/**
* Populate a url builder based on the query of a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
if (url.getQuery() != null) {
String q = url.getQuery();
// try to parse into &-separated key=value pairs
List<Pair<String, String>> pairs = new ArrayList<>();
boolean parseOk = true;
for (String queryChunk : q.split("&")) {
String[] queryParamChunks = queryChunk.split("=");
if (queryParamChunks.length != 2) {
parseOk = false;
break;
}
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
decoder.decode(queryParamChunks[1])));
}
if (parseOk) {
for (Pair<String, String> pair : pairs) {
builder.queryParam(pair.getKey(), pair.getValue());
}
} else {
builder.unstructuredQuery(decoder.decode(q));
}
}
}
/**
* Populate the path segments of a url builder from a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
for (String pathChunk : url.getPath().split("/")) {
if (pathChunk.equals("")) {
continue;
}
if (pathChunk.charAt(0) == ';') {
builder.pathSegment("");
// empty path segment, but matrix params
for (String matrixChunk : pathChunk.substring(1).split(";")) {
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
}
continue;
}
// otherwise, path chunk is non empty and does not start with a ';'
String[] matrixChunks = pathChunk.split(";");
// first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will
// not be included in the final url.
builder.pathSegment(decoder.decode(matrixChunks[0]));
// if there any other chunks, they're matrix param pairs
for (int i = 1; i < matrixChunks.length; i++) {
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
}
}
}
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
CharacterCodingException {
String[] mtxPair = pathMatrixChunk.split("=");
if (mtxPair.length != 2) {
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
}
String mtxName = mtxPair[0];
String mtxVal = mtxPair[1];
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
}
/**
* @param host original host string
* @return host encoded as in RFC 3986 section 3.2.2
*/
private String encodeHost(String host) throws CharacterCodingException {
// matching order: IP-literal, IPv4, reg-name
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
return host;
}
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
return regNameEncoder.encode(host);
}
/**
* Bundle of a path segment name and any associated matrix params.
*/
private static class PathSegment {
private final String segment;
private final List<Pair<String, String>> matrixParams = new ArrayList<>();
PathSegment(String segment) {
this.segment = segment;
}
}
static class Pair<K,V> {
K key;
V value;
Pair(K key, V value) {
this.key = key;
this.value = value;
}
static <K,V> Pair<K,V> of(K key, V value) {
return new Pair<>(key, value);
}
K getKey() {
return key;
}
V getValue() {
return value;
}
}
}

View file

@ -0,0 +1,164 @@
package org.xbib.oai.client.util;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import static java.nio.charset.CodingErrorAction.REPLACE;
/**
* See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding.
*/
public final class UrlPercentEncoders {
/**
* an encoder for RFC 3986 reg-names
*/
private static final BitSet REG_NAME_BIT_SET = new BitSet();
private static final BitSet PATH_BIT_SET = new BitSet();
private static final BitSet MATRIX_BIT_SET = new BitSet();
private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
private static final BitSet FRAGMENT_BIT_SET = new BitSet();
static {
// RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
// Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
addUnreserved(REG_NAME_BIT_SET);
addSubdelims(REG_NAME_BIT_SET);
// Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
addPChar(PATH_BIT_SET);
PATH_BIT_SET.clear((int) ';');
// Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded.
addPChar(MATRIX_BIT_SET);
MATRIX_BIT_SET.clear((int) ';');
MATRIX_BIT_SET.clear((int) '=');
/*
* At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
* specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
*/
addQuery(UNSTRUCTURED_QUERY_BIT_SET);
UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
/*
* Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
* pairs can be used.
*/
QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
QUERY_PARAM_BIT_SET.clear((int) '=');
QUERY_PARAM_BIT_SET.clear((int) '&');
addFragment(FRAGMENT_BIT_SET);
}
public static PercentEncoder getRegNameEncoder() {
return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getPathEncoder() {
return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getMatrixEncoder() {
return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getUnstructuredQueryEncoder() {
return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getQueryParamEncoder() {
return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getFragmentEncoder() {
return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
private UrlPercentEncoders() {
}
/**
* Add code points for 'fragment' chars
*
* @param fragmentBitSet bit set
*/
private static void addFragment(BitSet fragmentBitSet) {
addPChar(fragmentBitSet);
fragmentBitSet.set((int) '/');
fragmentBitSet.set((int) '?');
}
/**
* Add code points for 'query' chars
*
* @param queryBitSet bit set
*/
private static void addQuery(BitSet queryBitSet) {
addPChar(queryBitSet);
queryBitSet.set((int) '/');
queryBitSet.set((int) '?');
}
/**
* Add code points for 'pchar' chars.
*
* @param bs bitset
*/
private static void addPChar(BitSet bs) {
addUnreserved(bs);
addSubdelims(bs);
bs.set((int) ':');
bs.set((int) '@');
}
/**
* Add codepoints for 'unreserved' chars
*
* @param bs bitset to add codepoints to
*/
private static void addUnreserved(BitSet bs) {
for (int i = 'a'; i <= 'z'; i++) {
bs.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
bs.set(i);
}
for (int i = '0'; i <= '9'; i++) {
bs.set(i);
}
bs.set((int) '-');
bs.set((int) '.');
bs.set((int) '_');
bs.set((int) '~');
}
/**
* Add codepoints for 'sub-delims' chars
*
* @param bs bitset to add codepoints to
*/
private static void addSubdelims(BitSet bs) {
bs.set((int) '!');
bs.set((int) '$');
bs.set((int) '&');
bs.set((int) '\'');
bs.set((int) '(');
bs.set((int) ')');
bs.set((int) '*');
bs.set((int) '+');
bs.set((int) ',');
bs.set((int) ';');
bs.set((int) '=');
}
}

View file

@ -23,13 +23,15 @@ class DOAJClientTest {
IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity);
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null;
// override granularity because of "bad arguments" error. Seems DOAJ is unable to manage it's own declared granularity.
DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT"));
Handler handler = new Handler();
SplitWriter splitWriter = new SplitWriter("build/doaj-%d.xml", -1, 8192, false);
oaiClient.setSplitWriter(splitWriter);
Instant to = Instant.now();
Instant from = to.atZone(ZoneId.systemDefault()).minusMonths(1).toInstant();
oaiClient.listRecords("oai_dc", null,
dateTimeFormatter,Instant.parse("2021-05-01T00:00:00Z"), Instant.parse("2021-06-01T00:00:00Z"), null,
dateTimeFormatter, from, to, null,
handler, null);
logger.log(Level.INFO, "count = " + handler.count());
assertTrue(handler.count() > 0);

View file

@ -0,0 +1,84 @@
package org.xbib.oai.client.util;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException;
import java.util.BitSet;
import static java.nio.charset.CodingErrorAction.REPLACE;
import static org.junit.jupiter.api.Assertions.assertEquals;
public final class PercentEncoderTest {
private static PercentEncoder alnum;
private static PercentEncoder alnum16;
@BeforeAll
public static void setUp() {
BitSet bs = new BitSet();
for (int i = 'a'; i <= 'z'; i++) {
bs.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
bs.set(i);
}
for (int i = '0'; i <= '9'; i++) {
bs.set(i);
}
alnum = new PercentEncoder(bs, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
alnum16 = new PercentEncoder(bs, StandardCharsets.UTF_16BE.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
@Test
public void testDoesntEncodeSafe() throws CharacterCodingException {
BitSet set = new BitSet();
for (int i = 'a'; i <= 'z'; i++) {
set.set(i);
}
PercentEncoder pe = new PercentEncoder(set, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD"));
}
@Test
public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException {
assertEquals("abc%20123", alnum.encode("abc 123"));
}
@Test
public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException {
assertEquals("%20abc%20", alnum.encode(" abc "));
}
@Test
public void testEncodeUtf8() throws CharacterCodingException {
// 1 UTF-16 char (unicode snowman)
assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603"));
}
@Test
public void testEncodeUtf8SurrogatePair() throws CharacterCodingException {
// musical G clef: 1d11e, has to be represented in surrogate pair form
assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e"));
}
@Test
public void testEncodeUtf16() throws CharacterCodingException {
// 1 UTF-16 char (unicode snowman)
assertEquals("snowman%26%03", alnum16.encode("snowman\u2603"));
}
@Test
public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException {
// musical G clef: 1d11e, has to be represented in surrogate pair form
assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e"));
}
}

View file

@ -0,0 +1,425 @@
package org.xbib.oai.client.util;
import org.junit.jupiter.api.Test;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
public final class UrlBuilderTest {
@Test
public void testNoUrlParts() throws Exception {
assertUrlEquals("http://foo.com", UrlBuilder.forHost("http", "foo.com").toUrlString());
}
@Test
public void testWithPort() throws Exception {
assertUrlEquals("http://foo.com:33", UrlBuilder.forHost("http", "foo.com", 33).toUrlString());
}
@Test
public void testSimplePath() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.pathSegment("seg1").pathSegment("seg2");
assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString());
}
@Test
public void testPathWithReserved() throws Exception {
// RFC 1738 S3.3
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.pathSegment("seg/;?ment").pathSegment("seg=&2");
assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString());
}
@Test
public void testPathSegments() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.pathSegments("seg1", "seg2", "seg3");
assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString());
}
@Test
public void testMatrixWithoutPathHasLeadingSlash() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.matrixParam("foo", "bar");
assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString());
}
@Test
public void testMatrixWithReserved() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com")
.pathSegment("foo")
.matrixParam("foo", "bar")
.matrixParam("res;=?#/erved", "value")
.pathSegment("baz");
assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString());
}
@Test
public void testUrlEncodedPathSegmentUtf8() throws Exception {
// 1 UTF-16 char
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.pathSegment("snowman").pathSegment("\u2603");
assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString());
}
@Test
public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
// musical G clef: 1d11e, has to be represented in surrogate pair form
ub.pathSegment("clef").pathSegment("\ud834\udd1e");
assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString());
}
@Test
public void testQueryParamNoPath() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.queryParam("foo", "bar");
String s = ub.toUrlString();
assertUrlEquals("http://foo.com?foo=bar", s);
}
@Test
public void testQueryParamsDuplicated() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.queryParam("foo", "bar");
ub.queryParam("foo", "bar2");
ub.queryParam("baz", "quux");
ub.queryParam("baz", "quux2");
assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString());
}
@Test
public void testEncodeQueryParams() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.queryParam("foo", "bar&=#baz");
ub.queryParam("foo", "bar?/2");
assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString());
}
@Test
public void testEncodeQueryParamWithSpaceAndPlus() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.queryParam("foo", "spa ce");
ub.queryParam("fo+o", "plus+");
assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString());
}
@Test
public void testPlusInVariousParts() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.pathSegment("has+plus")
.matrixParam("plusMtx", "pl+us")
.queryParam("plusQp", "pl+us")
.fragment("plus+frag");
assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString());
}
@Test
public void testFragment() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com");
ub.queryParam("foo", "bar");
ub.fragment("#frag/?");
assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString());
}
@Test
public void testAllParts() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("https", "foo.bar.com", 3333);
ub.pathSegment("foo");
ub.pathSegment("bar");
ub.matrixParam("mtx1", "val1");
ub.matrixParam("mtx2", "val2");
ub.queryParam("q1", "v1");
ub.queryParam("q2", "v2");
ub.fragment("zomg it's a fragment");
assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment",
ub.toUrlString());
}
@Test
public void testIPv4Literal() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "127.0.0.1");
assertUrlEquals("http://127.0.0.1", ub.toUrlString());
}
@Test
public void testBadIPv4LiteralDoesntChoke() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "300.100.50.1");
assertUrlEquals("http://300.100.50.1", ub.toUrlString());
}
@Test
public void testIPv6LiteralLocalhost() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "[::1]");
assertUrlEquals("http://[::1]", ub.toUrlString());
}
@Test
public void testIPv6Literal() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "[2001:db8:85a3::8a2e:370:7334]");
assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString());
}
@Test
public void testEncodedRegNameSingleByte() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "host?name;");
assertUrlEquals("http://host%3Fname;", ub.toUrlString());
}
@Test
public void testEncodedRegNameMultiByte() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "snow\u2603man");
assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString());
}
@Test
public void testForceTrailingSlash() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c");
assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString());
}
@Test
public void testForceTrailingSlashWithQueryParams() throws Exception {
UrlBuilder ub =
UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar");
assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString());
}
@Test
public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1");
assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString());
}
@Test
public void testIntermingledMatrixParamsAndPathSegments() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com")
.pathSegments("seg1", "seg2")
.matrixParam("m1", "v1")
.pathSegment("seg3")
.matrixParam("m2", "v2");
assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString());
}
@Test
public void testFromUrlWithEverything() throws URISyntaxException, CharacterCodingException, MalformedURLException {
String orig =
"https://foo.bar.com:3333/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragment";
assertUrlBuilderRoundtrip(orig);
}
@Test
public void testFromUrlWithEmptyPath() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com");
}
@Test
public void testFromUrlWithEmptyPathAndSlash() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com");
}
@Test
public void testFromUrlWithPort() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com:1234");
}
@Test
public void testFromUrlWithEmptyPathSegent() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo");
}
@Test
public void testFromUrlWithEncodedHost() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://f%20oo.com/bar");
}
@Test
public void testFromUrlWithEncodedPathSegment() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar");
}
@Test
public void testFromUrlWithEncodedMatrixParam() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202");
}
@Test
public void testFromUrlWithEncodedQueryParam() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2");
}
@Test
public void testFromUrlWithEncodedQueryParamDelimiter() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2");
}
@Test
public void testFromUrlWithEncodedFragment() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar");
}
@Test
public void testFromUrlWithMalformedMatrixPair() throws MalformedURLException, CharacterCodingException {
try {
UrlBuilder.fromUrl("http://foo.com/foo;m1=v1=v2");
fail();
} catch (IllegalArgumentException e) {
assertEquals("Malformed matrix param: <m1=v1=v2>", e.getMessage());
}
}
@Test
public void testFromUrlWithEmptyPathSegmentWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1");
}
@Test
public void testFromUrlWithEmptyPathWithMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1");
}
@Test
public void testFromUrlWithEmptyPathWithMultipleMatrixParams() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2");
}
@Test
public void testFromUrlWithPathSegmentEndingWithSemicolon() throws URISyntaxException, CharacterCodingException, MalformedURLException {
assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo");
}
@Test
public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException {
try {
UrlBuilder.fromUrl("http://foo.com/fo%2o");
fail();
} catch (IllegalArgumentException e) {
assertEquals("Invalid %-tuple <%2o>", e.getMessage());
}
}
@Test
public void testFromUrlMalformedQueryParamMultiValues() throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2");
}
@Test
public void testFromUrlMalformedQueryParamNoValue() throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2");
}
@Test
public void testFromUrlUnstructuredQueryWithEscapedChars() throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23");
}
@Test
public void testCantUseQueryParamAfterQuery() {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q");
try {
ub.queryParam("foo", "bar");
fail();
} catch (IllegalStateException e) {
assertEquals("Cannot call queryParam() when this already has an unstructured query specified",
e.getMessage());
}
}
@Test
public void testCantUseQueryAfterQueryParam() {
UrlBuilder ub = UrlBuilder.forHost("http", "foo.com").queryParam("foo", "bar");
try {
ub.unstructuredQuery("q");
fail();
} catch (IllegalStateException e) {
assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified",
e.getMessage());
}
}
@Test
public void testUnstructuredQueryWithNoSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q").toUrlString());
}
@Test
public void testUnstructuredQueryWithOkSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q?/&=", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString());
}
@Test
public void testUnstructuredQueryWithEscapedSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q%23%2B", UrlBuilder.forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString());
}
@Test
public void testClearQueryRemovesQueryParam() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "host")
.queryParam("foo", "bar")
.clearQuery();
assertUrlEquals("http://host", ub.toUrlString());
}
@Test
public void testClearQueryRemovesUnstructuredQuery() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "host")
.unstructuredQuery("foobar")
.clearQuery();
assertUrlEquals("http://host", ub.toUrlString());
}
@Test
public void testClearQueryAfterQueryParamAllowsQuery() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "host")
.queryParam("foo", "bar")
.clearQuery()
.unstructuredQuery("foobar");
assertUrlEquals("http://host?foobar", ub.toUrlString());
}
@Test
public void testClearQueryAfterQueryAllowsQueryParam() throws Exception {
UrlBuilder ub = UrlBuilder.forHost("http", "host")
.unstructuredQuery("foobar")
.clearQuery()
.queryParam("foo", "bar");
assertUrlEquals("http://host?foo=bar", ub.toUrlString());
}
private void assertUrlBuilderRoundtrip(String url) throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlBuilderRoundtrip(url, url);
}
/**
* @param origUrl the url that will be used to create a URL
* @param finalUrl the URL string it should end up as
*/
private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl) throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlEquals(finalUrl, UrlBuilder.fromUrl(new URL(origUrl)).toUrlString());
}
private static void assertUrlEquals(String expected, String actual) throws URISyntaxException, MalformedURLException {
assertEquals(expected, actual);
assertEquals(expected, new URI(actual).toString());
assertEquals(expected, new URL(actual).toString());
}
}

View file

@ -1,6 +1,6 @@
dependencies {
api "org.xbib:content-core:${project.property('xbib-content.version')}"
api "org.xbib:content-rdf:${project.property('xbib-content.version')}"
api "org.xbib:content-resource:${project.property('xbib-content.version')}"
api "org.xbib:content-xml:${project.property('xbib-content.version')}"
api libs.content.core
api libs.content.rdf
api libs.content.resource
api libs.content.xml
}

View file

@ -1,3 +1,23 @@
dependencyResolutionManagement {
versionCatalogs {
libs {
version('gradle', '7.5.1')
version('junit', '5.9.1')
version('content', '5.0.1')
library('junit-jupiter-api', 'org.junit.jupiter', 'junit-jupiter-api').versionRef('junit')
library('junit-jupiter-params', 'org.junit.jupiter', 'junit-jupiter-params').versionRef('junit')
library('junit-jupiter-engine', 'org.junit.jupiter', 'junit-jupiter-engine').versionRef('junit')
library('hamcrest', 'org.hamcrest', 'hamcrest-library').version('2.2')
library('content-core', 'org.xbib', 'content-json').versionRef('content')
library('content-rdf', 'org.xbib', 'content-rdf').versionRef('content')
library('content-resource', 'org.xbib', 'content-resource').versionRef('content')
library('content-xml', 'org.xbib', 'content-xml').versionRef('content')
library('marc', 'org.xbib', 'marc').version('2.7.0')
library('charactersets', 'org.xbib', 'bibliographic-character-sets').version('2.0.0')
}
}
}
include 'oai-common'
include 'oai-client'
include 'oai-server'