add SRU client
This commit is contained in:
parent
96ba70d4dc
commit
351ea46d19
12 changed files with 1158 additions and 13 deletions
|
@ -30,6 +30,7 @@ dependencyResolutionManagement {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
include 'sru-client-jdk'
|
||||||
include 'z3950-asn1'
|
include 'z3950-asn1'
|
||||||
include 'z3950-api'
|
include 'z3950-api'
|
||||||
include 'z3950-common'
|
include 'z3950-common'
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
package org.xbib.sru.client.jdk;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import org.xbib.sru.client.jdk.util.UrlBuilder;
|
||||||
|
|
||||||
|
public class SRUClient {
|
||||||
|
|
||||||
|
private static final Logger logger = Logger.getLogger(SRUClient.class.getName());
|
||||||
|
|
||||||
|
private final Builder builder;
|
||||||
|
|
||||||
|
private final HttpClient httpClient;
|
||||||
|
|
||||||
|
private SRUClient(Builder builder) {
|
||||||
|
this.builder = builder;
|
||||||
|
this.httpClient = HttpClient.newBuilder()
|
||||||
|
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void searchRetrieve(String query,
|
||||||
|
String recordSchema,
|
||||||
|
Integer startRecord,
|
||||||
|
Integer maximumRecords,
|
||||||
|
Consumer<Reader> consumer) throws IOException, InterruptedException {
|
||||||
|
UrlBuilder url = UrlBuilder.fromUrl(builder.baseURL);
|
||||||
|
url.queryParam(SRUConstants.OPERATION_PARAMETER, "searchRetrieve");
|
||||||
|
url.queryParam(SRUConstants.VERSION_PARAMETER, "1.1");
|
||||||
|
url.queryParam(SRUConstants.RECORD_SCHEMA_PARAMETER, recordSchema);
|
||||||
|
url.queryParam(SRUConstants.START_RECORD_PARAMETER, Integer.toString(startRecord));
|
||||||
|
url.queryParam(SRUConstants.MAXIMUM_RECORDS_PARAMETER, Integer.toString(maximumRecords));
|
||||||
|
url.queryParam(SRUConstants.QUERY_PARAMETER, query);
|
||||||
|
URI uri = URI.create(url.build().toExternalForm());
|
||||||
|
HttpRequest httpRequest = HttpRequest.newBuilder()
|
||||||
|
.uri(uri)
|
||||||
|
.header("accept", "utf-8")
|
||||||
|
.header("user-agent", builder.userAgent != null ? builder.userAgent : "xbib SRU client")
|
||||||
|
.GET()
|
||||||
|
.build();
|
||||||
|
logger.log(Level.INFO, "sending " + httpRequest);
|
||||||
|
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
|
||||||
|
int status = httpResponse.statusCode();
|
||||||
|
logger.log(Level.FINE, "response status = " + status + " headers = " + httpResponse.headers());
|
||||||
|
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
|
||||||
|
if (status == 200) {
|
||||||
|
String string = httpResponse.body();
|
||||||
|
if (string != null && string.length() > 0) {
|
||||||
|
consumer.accept(new StringReader(string));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
|
||||||
|
private String baseURL;
|
||||||
|
|
||||||
|
private String userAgent;
|
||||||
|
|
||||||
|
private Builder() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setBaseURL(String baseURL) {
|
||||||
|
this.baseURL = baseURL;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder setUserAgent(String userAgent) {
|
||||||
|
this.userAgent = userAgent;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SRUClient build() {
|
||||||
|
return new SRUClient(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,17 @@
|
||||||
|
package org.xbib.sru.client.jdk;
|
||||||
|
|
||||||
|
public interface SRUConstants {
|
||||||
|
|
||||||
|
String OPERATION_PARAMETER = "operation";
|
||||||
|
|
||||||
|
String VERSION_PARAMETER = "version";
|
||||||
|
|
||||||
|
String RECORD_SCHEMA_PARAMETER = "recordSchema";
|
||||||
|
|
||||||
|
String QUERY_PARAMETER = "query";
|
||||||
|
|
||||||
|
String START_RECORD_PARAMETER = "startRecord";
|
||||||
|
|
||||||
|
String MAXIMUM_RECORDS_PARAMETER = "maximumRecords";
|
||||||
|
|
||||||
|
}
|
195
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/PercentDecoder.java
Executable file
195
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/PercentDecoder.java
Executable file
|
@ -0,0 +1,195 @@
|
||||||
|
package org.xbib.sru.client.jdk.util;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.CoderResult;
|
||||||
|
import java.nio.charset.MalformedInputException;
|
||||||
|
import java.nio.charset.UnmappableCharacterException;
|
||||||
|
import static java.nio.charset.CoderResult.OVERFLOW;
|
||||||
|
import static java.nio.charset.CoderResult.UNDERFLOW;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes percent-encoded (%XX) Unicode text.
|
||||||
|
*/
|
||||||
|
public final class PercentDecoder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bytes represented by the current sequence of %-triples. Resized as needed.
|
||||||
|
*/
|
||||||
|
private ByteBuffer encodedBuf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Written to with decoded chars by decoder
|
||||||
|
*/
|
||||||
|
private final CharBuffer decodedCharBuf;
|
||||||
|
private final CharsetDecoder decoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The decoded string for the current input
|
||||||
|
*/
|
||||||
|
private final StringBuilder outputBuf = new StringBuilder();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a new PercentDecoder with default buffer sizes.
|
||||||
|
*
|
||||||
|
* @param charsetDecoder Charset to decode bytes into chars with
|
||||||
|
* @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
|
||||||
|
*/
|
||||||
|
public PercentDecoder(CharsetDecoder charsetDecoder) {
|
||||||
|
this(charsetDecoder, 16, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param charsetDecoder Charset to decode bytes into chars with
|
||||||
|
* @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
|
||||||
|
* @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
|
||||||
|
*/
|
||||||
|
public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
|
||||||
|
int decodedCharBufSize) {
|
||||||
|
encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
|
||||||
|
decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
|
||||||
|
decoder = charsetDecoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
|
||||||
|
* "%20" for a space character
|
||||||
|
* @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
|
||||||
|
* @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
|
||||||
|
* @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
|
||||||
|
* detected
|
||||||
|
*/
|
||||||
|
public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
outputBuf.setLength(0);
|
||||||
|
// this is almost always an underestimate of the size needed:
|
||||||
|
// only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
|
||||||
|
outputBuf.ensureCapacity(input.length() / 8);
|
||||||
|
encodedBuf.clear();
|
||||||
|
|
||||||
|
for (int i = 0; i < input.length(); i++) {
|
||||||
|
char c = input.charAt(i);
|
||||||
|
if (c != '%') {
|
||||||
|
handleEncodedBytes();
|
||||||
|
|
||||||
|
outputBuf.append(c);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i + 2 >= input.length()) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// grow the byte buf if needed
|
||||||
|
if (encodedBuf.remaining() == 0) {
|
||||||
|
ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
|
||||||
|
encodedBuf.flip();
|
||||||
|
largerBuf.put(encodedBuf);
|
||||||
|
encodedBuf = largerBuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// note that we advance i here as we consume chars
|
||||||
|
int msBits = Character.digit(input.charAt(++i), 16);
|
||||||
|
int lsBits = Character.digit(input.charAt(++i), 16);
|
||||||
|
|
||||||
|
if (msBits == -1 || lsBits == -1) {
|
||||||
|
throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
|
||||||
|
}
|
||||||
|
|
||||||
|
msBits <<= 4;
|
||||||
|
msBits |= lsBits;
|
||||||
|
|
||||||
|
// msBits can only have 8 bits set, so cast is safe
|
||||||
|
encodedBuf.put((byte) msBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
handleEncodedBytes();
|
||||||
|
|
||||||
|
return outputBuf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode any buffered encoded bytes and write them to the output buf.
|
||||||
|
*/
|
||||||
|
private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
if (encodedBuf.position() == 0) {
|
||||||
|
// nothing to do
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
decoder.reset();
|
||||||
|
CoderResult coderResult;
|
||||||
|
|
||||||
|
// switch to reading mode
|
||||||
|
encodedBuf.flip();
|
||||||
|
|
||||||
|
// loop while we're filling up the decoded char buf, or there's any encoded bytes
|
||||||
|
// decode() in practice seems to only consume bytes when it can decode an entire char...
|
||||||
|
do {
|
||||||
|
decodedCharBuf.clear();
|
||||||
|
coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
|
||||||
|
throwIfError(coderResult);
|
||||||
|
appendDecodedChars();
|
||||||
|
} while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
|
||||||
|
|
||||||
|
// final decode with end-of-input flag
|
||||||
|
decodedCharBuf.clear();
|
||||||
|
coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
|
||||||
|
throwIfError(coderResult);
|
||||||
|
|
||||||
|
if (encodedBuf.hasRemaining()) {
|
||||||
|
throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
|
||||||
|
}
|
||||||
|
if (coderResult != UNDERFLOW) {
|
||||||
|
throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
appendDecodedChars();
|
||||||
|
|
||||||
|
// we've finished the input, wrap it up
|
||||||
|
encodedBuf.clear();
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Must only be called when the input encoded bytes buffer is empty
|
||||||
|
*/
|
||||||
|
private void flush() throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
CoderResult coderResult;
|
||||||
|
decodedCharBuf.clear();
|
||||||
|
|
||||||
|
coderResult = decoder.flush(decodedCharBuf);
|
||||||
|
appendDecodedChars();
|
||||||
|
|
||||||
|
throwIfError(coderResult);
|
||||||
|
|
||||||
|
if (coderResult != UNDERFLOW) {
|
||||||
|
throw new IllegalStateException("Decoder flush resulted in " + coderResult);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
|
||||||
|
* CharacterCodingException.
|
||||||
|
*
|
||||||
|
* @param coderResult result to check
|
||||||
|
* @throws MalformedInputException if result represents malformed input
|
||||||
|
* @throws UnmappableCharacterException if result represents an unmappable character
|
||||||
|
*/
|
||||||
|
private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
if (coderResult.isMalformed()) {
|
||||||
|
throw new MalformedInputException(coderResult.length());
|
||||||
|
}
|
||||||
|
if (coderResult.isUnmappable()) {
|
||||||
|
throw new UnmappableCharacterException(coderResult.length());
|
||||||
|
} }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flip the decoded char buf and append it to the string bug
|
||||||
|
*/
|
||||||
|
private void appendDecodedChars() {
|
||||||
|
decodedCharBuf.flip();
|
||||||
|
outputBuf.append(decodedCharBuf);
|
||||||
|
}
|
||||||
|
}
|
186
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/PercentEncoder.java
Executable file
186
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/PercentEncoder.java
Executable file
|
@ -0,0 +1,186 @@
|
||||||
|
package org.xbib.sru.client.jdk.util;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharsetEncoder;
|
||||||
|
import java.nio.charset.CoderResult;
|
||||||
|
import java.nio.charset.MalformedInputException;
|
||||||
|
import java.nio.charset.UnmappableCharacterException;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import static java.lang.Character.isHighSurrogate;
|
||||||
|
import static java.lang.Character.isLowSurrogate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
|
||||||
|
*
|
||||||
|
* This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
|
||||||
|
* PercentEncoder instances.
|
||||||
|
*/
|
||||||
|
public final class PercentEncoder {
|
||||||
|
|
||||||
|
private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
|
||||||
|
|
||||||
|
private final BitSet safeChars;
|
||||||
|
private final CharsetEncoder encoder;
|
||||||
|
/**
|
||||||
|
* Pre-allocate a string handler to make the common case of encoding to a string faster
|
||||||
|
*/
|
||||||
|
private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
|
||||||
|
private final ByteBuffer encodedBytes;
|
||||||
|
private final CharBuffer unsafeCharsToEncode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
|
||||||
|
* those chars set to true. Treated as read only.
|
||||||
|
* @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances
|
||||||
|
* across threads.
|
||||||
|
*/
|
||||||
|
public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) {
|
||||||
|
this.safeChars = safeChars;
|
||||||
|
this.encoder = charsetEncoder;
|
||||||
|
|
||||||
|
// why is this a float? sigh.
|
||||||
|
int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
|
||||||
|
// need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
|
||||||
|
encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
|
||||||
|
unsafeCharsToEncode = CharBuffer.allocate(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode the input and pass output chars to a handler.
|
||||||
|
*
|
||||||
|
* @param input input string
|
||||||
|
* @param handler handler to call on each output character
|
||||||
|
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
|
||||||
|
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
|
||||||
|
* detected
|
||||||
|
*/
|
||||||
|
public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws
|
||||||
|
MalformedInputException, UnmappableCharacterException {
|
||||||
|
|
||||||
|
for (int i = 0; i < input.length(); i++) {
|
||||||
|
|
||||||
|
char c = input.charAt(i);
|
||||||
|
|
||||||
|
if (safeChars.get(c)) {
|
||||||
|
handler.onOutputChar(c);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// not a safe char
|
||||||
|
unsafeCharsToEncode.clear();
|
||||||
|
unsafeCharsToEncode.append(c);
|
||||||
|
if (isHighSurrogate(c)) {
|
||||||
|
if (input.length() > i + 1) {
|
||||||
|
// get the low surrogate as well
|
||||||
|
char lowSurrogate = input.charAt(i + 1);
|
||||||
|
if (isLowSurrogate(lowSurrogate)) {
|
||||||
|
unsafeCharsToEncode.append(lowSurrogate);
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
|
||||||
|
.toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
|
||||||
|
.toHexString(lowSurrogate) + ")");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
|
||||||
|
.toHexString(c) + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
flushUnsafeCharBuffer(handler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode the input and return the resulting text as a String.
|
||||||
|
*
|
||||||
|
* @param input input string
|
||||||
|
* @return the input string with every character that's not in safeChars turned into its byte representation via the
|
||||||
|
* instance's encoder and then percent-encoded
|
||||||
|
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
|
||||||
|
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
|
||||||
|
* detected
|
||||||
|
*/
|
||||||
|
public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
stringHandler.reset();
|
||||||
|
stringHandler.ensureCapacity(input.length());
|
||||||
|
encode(input, stringHandler);
|
||||||
|
return stringHandler.getContents();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
|
||||||
|
*
|
||||||
|
* Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException,
|
||||||
|
UnmappableCharacterException {
|
||||||
|
// need to read from the char buffer, which was most recently written to
|
||||||
|
unsafeCharsToEncode.flip();
|
||||||
|
|
||||||
|
encodedBytes.clear();
|
||||||
|
|
||||||
|
encoder.reset();
|
||||||
|
CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
|
||||||
|
checkResult(result);
|
||||||
|
result = encoder.flush(encodedBytes);
|
||||||
|
checkResult(result);
|
||||||
|
|
||||||
|
// read contents of bytebuffer
|
||||||
|
encodedBytes.flip();
|
||||||
|
|
||||||
|
while (encodedBytes.hasRemaining()) {
|
||||||
|
byte b = encodedBytes.get();
|
||||||
|
handler.onOutputChar('%');
|
||||||
|
handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
|
||||||
|
handler.onOutputChar(HEX_CODE[b & 0xF]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param result result to check
|
||||||
|
* @throws IllegalStateException if result is overflow
|
||||||
|
* @throws MalformedInputException if result represents malformed input
|
||||||
|
* @throws UnmappableCharacterException if result represents an unmappable character
|
||||||
|
*/
|
||||||
|
private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
|
||||||
|
if (result.isOverflow()) {
|
||||||
|
throw new IllegalStateException("Byte buffer overflow; this should not happen.");
|
||||||
|
}
|
||||||
|
if (result.isMalformed()) {
|
||||||
|
throw new MalformedInputException(result.length());
|
||||||
|
}
|
||||||
|
if (result.isUnmappable()) {
|
||||||
|
throw new UnmappableCharacterException(result.length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class StringBuilderPercentEncoderOutputHandler {
|
||||||
|
|
||||||
|
private final StringBuilder stringBuilder;
|
||||||
|
|
||||||
|
StringBuilderPercentEncoderOutputHandler() {
|
||||||
|
stringBuilder = new StringBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getContents() {
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
stringBuilder.setLength(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ensureCapacity(int length) {
|
||||||
|
stringBuilder.ensureCapacity(length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void onOutputChar(char c) {
|
||||||
|
stringBuilder.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
502
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/UrlBuilder.java
Executable file
502
sru-client-jdk/src/main/java/org/xbib/sru/client/jdk/util/UrlBuilder.java
Executable file
|
@ -0,0 +1,502 @@
|
||||||
|
package org.xbib.sru.client.jdk.util;
|
||||||
|
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.CharacterCodingException;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getFragmentEncoder;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getMatrixEncoder;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getPathEncoder;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getQueryParamEncoder;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getRegNameEncoder;
|
||||||
|
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getUnstructuredQueryEncoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builder for urls with url-encoding applied to path, query param, etc.
|
||||||
|
*
|
||||||
|
* Escaping rules are from RFC 3986, RFC 1738 and the <a href ="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4 spec</a>
|
||||||
|
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
|
||||||
|
* HTTP-useful URLs.
|
||||||
|
*/
|
||||||
|
public final class UrlBuilder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IPv6 address, cribbed from <a href="http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings">StackOverflow</a>
|
||||||
|
*/
|
||||||
|
private static final Pattern IPV6_PATTERN = Pattern
|
||||||
|
.compile(
|
||||||
|
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IPv4 dotted quad
|
||||||
|
*/
|
||||||
|
private static final Pattern IPV4_PATTERN = Pattern
|
||||||
|
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
|
||||||
|
|
||||||
|
private final String scheme;
|
||||||
|
|
||||||
|
private final String host;
|
||||||
|
|
||||||
|
private final Integer port;
|
||||||
|
|
||||||
|
private final List<Pair<String, String>> queryParams = new ArrayList<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this is non-null, queryParams must be empty, and vice versa.
|
||||||
|
*/
|
||||||
|
private String unstructuredQuery;
|
||||||
|
|
||||||
|
private final List<PathSegment> pathSegments = new ArrayList<>();
|
||||||
|
|
||||||
|
private final PercentEncoder pathEncoder = getPathEncoder();
|
||||||
|
private final PercentEncoder regNameEncoder = getRegNameEncoder();
|
||||||
|
private final PercentEncoder matrixEncoder = getMatrixEncoder();
|
||||||
|
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
|
||||||
|
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
|
||||||
|
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
|
||||||
|
|
||||||
|
private String fragment;
|
||||||
|
|
||||||
|
private boolean forceTrailingSlash = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a URL with UTF-8 encoding.
|
||||||
|
*
|
||||||
|
* @param scheme scheme (e.g. http)
|
||||||
|
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
|
||||||
|
* @param port null or a positive integer
|
||||||
|
*/
|
||||||
|
private UrlBuilder(String scheme, String host, Integer port) {
|
||||||
|
this.host = host;
|
||||||
|
this.scheme = scheme;
|
||||||
|
this.port = port;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a URL with an null port and UTF-8 encoding.
|
||||||
|
*
|
||||||
|
* @param scheme scheme (e.g. http)
|
||||||
|
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
|
||||||
|
* ([::1]), excluding IPvFuture since no one uses that in practice
|
||||||
|
* @return a url builder
|
||||||
|
* @see UrlBuilder#forHost(String scheme, String host, int port)
|
||||||
|
*/
|
||||||
|
public static UrlBuilder forHost(String scheme, String host) {
|
||||||
|
return new UrlBuilder(scheme, host, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param scheme scheme (e.g. http)
|
||||||
|
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
|
||||||
|
* ([::1]), excluding IPvFuture since no one uses that in practice
|
||||||
|
* @param port port
|
||||||
|
* @return a url builder
|
||||||
|
*/
|
||||||
|
public static UrlBuilder forHost(String scheme, String host, int port) {
|
||||||
|
return new UrlBuilder(scheme, host, port);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static UrlBuilder fromUrl(String urlSpec) throws CharacterCodingException, MalformedURLException {
|
||||||
|
return fromUrl(new URL(urlSpec));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
|
||||||
|
* query string apply.
|
||||||
|
*
|
||||||
|
* @param url url to initialize builder with
|
||||||
|
* @return a UrlBuilder containing the host, path, etc. from the url
|
||||||
|
* @throws CharacterCodingException if char decoding fails
|
||||||
|
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
|
||||||
|
*/
|
||||||
|
public static UrlBuilder fromUrl(URL url) throws CharacterCodingException {
|
||||||
|
return fromUrl(url, StandardCharsets.UTF_8.newDecoder());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a UrlBuilder initialized with the contents of a {@link URL}.
|
||||||
|
*
|
||||||
|
* The query string will be parsed into HTML4 query params if it can be separated into a
|
||||||
|
* <code>&</code>-separated sequence of <code>key=value</code> pairs. The sequence of query params can then be
|
||||||
|
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
|
||||||
|
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
|
||||||
|
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
|
||||||
|
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
|
||||||
|
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
|
||||||
|
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
|
||||||
|
*
|
||||||
|
* @param url url to initialize builder with
|
||||||
|
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
|
||||||
|
* @return a UrlBuilder containing the host, path, etc. from the url
|
||||||
|
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
|
||||||
|
* report errors
|
||||||
|
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
|
||||||
|
*/
|
||||||
|
public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws
|
||||||
|
CharacterCodingException {
|
||||||
|
|
||||||
|
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
|
||||||
|
// reg names must be encoded UTF-8
|
||||||
|
PercentDecoder regNameDecoder;
|
||||||
|
if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) {
|
||||||
|
regNameDecoder = decoder;
|
||||||
|
} else {
|
||||||
|
regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer port = url.getPort();
|
||||||
|
if (port == -1) {
|
||||||
|
port = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
|
||||||
|
|
||||||
|
buildFromPath(builder, decoder, url);
|
||||||
|
|
||||||
|
buildFromQuery(builder, decoder, url);
|
||||||
|
|
||||||
|
if (url.getRef() != null) {
|
||||||
|
builder.fragment(decoder.decode(url.getRef()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a path segment.
|
||||||
|
*
|
||||||
|
* @param segment a path segment
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder pathSegment(String segment) {
|
||||||
|
pathSegments.add(new PathSegment(segment));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
|
||||||
|
*
|
||||||
|
* @param segments path segments
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder pathSegments(String... segments) {
|
||||||
|
for (String segment : segments) {
|
||||||
|
pathSegment(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add an HTML query parameter. Query parameters will be encoded in the order added.
|
||||||
|
*
|
||||||
|
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
|
||||||
|
* <a href="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4</a>.
|
||||||
|
*
|
||||||
|
* If you use this method to build a query string, or created this builder from a url with a query string that can
|
||||||
|
* successfully be parsed into query param pairs, you cannot subsequently use {@link
|
||||||
|
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
|
||||||
|
*
|
||||||
|
* @param name param name
|
||||||
|
* @param value param value
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder queryParam(String name, String value) {
|
||||||
|
if (unstructuredQuery != null) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Cannot call queryParam() when this already has an unstructured query specified");
|
||||||
|
}
|
||||||
|
|
||||||
|
queryParams.add(Pair.of(name, value));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
|
||||||
|
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
|
||||||
|
* that query.
|
||||||
|
*
|
||||||
|
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
|
||||||
|
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
|
||||||
|
* CharsetDecoder)}.
|
||||||
|
*
|
||||||
|
* @param query Complete URI query, as specified by <a href="https://tools.ietf.org/html/rfc3986#section-3.4">RFC 3986</a>.
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder unstructuredQuery(String query) {
|
||||||
|
if (!queryParams.isEmpty()) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
|
||||||
|
}
|
||||||
|
|
||||||
|
unstructuredQuery = query;
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear the unstructured query and any query params.
|
||||||
|
*
|
||||||
|
* Since the query / query param situation is a little complicated, this method will let you remove all query
|
||||||
|
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
|
||||||
|
* builder, and then re-doing its query params, for instance.
|
||||||
|
*
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder clearQuery() {
|
||||||
|
queryParams.clear();
|
||||||
|
unstructuredQuery = null;
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
|
||||||
|
* root. Matrix params will be encoded in the order added.
|
||||||
|
*
|
||||||
|
* @param name param name
|
||||||
|
* @param value param value
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder matrixParam(String name, String value) {
|
||||||
|
if (pathSegments.isEmpty()) {
|
||||||
|
// create an empty path segment to represent a matrix param applied to the root
|
||||||
|
pathSegment("");
|
||||||
|
}
|
||||||
|
|
||||||
|
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
|
||||||
|
seg.matrixParams.add(Pair.of(name, value));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the fragment.
|
||||||
|
*
|
||||||
|
* @param fragment fragment string
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder fragment(String fragment) {
|
||||||
|
this.fragment = fragment;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Force the generated URL to have a trailing slash at the end of the path.
|
||||||
|
*
|
||||||
|
* @return this
|
||||||
|
*/
|
||||||
|
public UrlBuilder forceTrailingSlash() {
|
||||||
|
forceTrailingSlash = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public URL build() throws CharacterCodingException, MalformedURLException {
|
||||||
|
return new URL(toUrlString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode the current builder state into a URL string.
|
||||||
|
*
|
||||||
|
* @return a well-formed URL string
|
||||||
|
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
|
||||||
|
*/
|
||||||
|
public String toUrlString() throws CharacterCodingException {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
|
||||||
|
buf.append(scheme);
|
||||||
|
buf.append("://");
|
||||||
|
|
||||||
|
buf.append(encodeHost(host));
|
||||||
|
if (port != null) {
|
||||||
|
buf.append(':');
|
||||||
|
buf.append(port);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (PathSegment pathSegment : pathSegments) {
|
||||||
|
buf.append('/');
|
||||||
|
buf.append(pathEncoder.encode(pathSegment.segment));
|
||||||
|
|
||||||
|
for (Pair<String, String> matrixParam : pathSegment.matrixParams) {
|
||||||
|
buf.append(';');
|
||||||
|
buf.append(matrixEncoder.encode(matrixParam.getKey()));
|
||||||
|
buf.append('=');
|
||||||
|
buf.append(matrixEncoder.encode(matrixParam.getValue()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (forceTrailingSlash) {
|
||||||
|
buf.append('/');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!queryParams.isEmpty()) {
|
||||||
|
buf.append("?");
|
||||||
|
Iterator<Pair<String, String>> qpIter = queryParams.iterator();
|
||||||
|
while (qpIter.hasNext()) {
|
||||||
|
Pair<String, String> queryParam = qpIter.next();
|
||||||
|
buf.append(queryParamEncoder.encode(queryParam.getKey()));
|
||||||
|
buf.append('=');
|
||||||
|
buf.append(queryParamEncoder.encode(queryParam.getValue()));
|
||||||
|
if (qpIter.hasNext()) {
|
||||||
|
buf.append('&');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (unstructuredQuery != null) {
|
||||||
|
buf.append("?");
|
||||||
|
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fragment != null) {
|
||||||
|
buf.append('#');
|
||||||
|
buf.append(fragmentEncoder.encode(fragment));
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populate a url builder based on the query of a url
|
||||||
|
*
|
||||||
|
* @param builder builder
|
||||||
|
* @param decoder decoder
|
||||||
|
* @param url url
|
||||||
|
* @throws CharacterCodingException
|
||||||
|
*/
|
||||||
|
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
|
||||||
|
CharacterCodingException {
|
||||||
|
if (url.getQuery() != null) {
|
||||||
|
String q = url.getQuery();
|
||||||
|
|
||||||
|
// try to parse into &-separated key=value pairs
|
||||||
|
List<Pair<String, String>> pairs = new ArrayList<>();
|
||||||
|
boolean parseOk = true;
|
||||||
|
|
||||||
|
for (String queryChunk : q.split("&")) {
|
||||||
|
String[] queryParamChunks = queryChunk.split("=");
|
||||||
|
|
||||||
|
if (queryParamChunks.length != 2) {
|
||||||
|
parseOk = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
|
||||||
|
decoder.decode(queryParamChunks[1])));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parseOk) {
|
||||||
|
for (Pair<String, String> pair : pairs) {
|
||||||
|
builder.queryParam(pair.getKey(), pair.getValue());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder.unstructuredQuery(decoder.decode(q));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populate the path segments of a url builder from a url
|
||||||
|
*
|
||||||
|
* @param builder builder
|
||||||
|
* @param decoder decoder
|
||||||
|
* @param url url
|
||||||
|
* @throws CharacterCodingException
|
||||||
|
*/
|
||||||
|
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
|
||||||
|
CharacterCodingException {
|
||||||
|
for (String pathChunk : url.getPath().split("/")) {
|
||||||
|
if (pathChunk.equals("")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pathChunk.charAt(0) == ';') {
|
||||||
|
builder.pathSegment("");
|
||||||
|
// empty path segment, but matrix params
|
||||||
|
for (String matrixChunk : pathChunk.substring(1).split(";")) {
|
||||||
|
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, path chunk is non empty and does not start with a ';'
|
||||||
|
|
||||||
|
String[] matrixChunks = pathChunk.split(";");
|
||||||
|
|
||||||
|
// first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will
|
||||||
|
// not be included in the final url.
|
||||||
|
builder.pathSegment(decoder.decode(matrixChunks[0]));
|
||||||
|
|
||||||
|
// if there any other chunks, they're matrix param pairs
|
||||||
|
for (int i = 1; i < matrixChunks.length; i++) {
|
||||||
|
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
|
||||||
|
CharacterCodingException {
|
||||||
|
String[] mtxPair = pathMatrixChunk.split("=");
|
||||||
|
if (mtxPair.length != 2) {
|
||||||
|
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
|
||||||
|
}
|
||||||
|
|
||||||
|
String mtxName = mtxPair[0];
|
||||||
|
String mtxVal = mtxPair[1];
|
||||||
|
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param host original host string
|
||||||
|
* @return host encoded as in RFC 3986 section 3.2.2
|
||||||
|
*/
|
||||||
|
private String encodeHost(String host) throws CharacterCodingException {
|
||||||
|
// matching order: IP-literal, IPv4, reg-name
|
||||||
|
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
|
||||||
|
return host;
|
||||||
|
}
|
||||||
|
|
||||||
|
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
|
||||||
|
return regNameEncoder.encode(host);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bundle of a path segment name and any associated matrix params.
|
||||||
|
*/
|
||||||
|
private static class PathSegment {
|
||||||
|
private final String segment;
|
||||||
|
private final List<Pair<String, String>> matrixParams = new ArrayList<>();
|
||||||
|
|
||||||
|
PathSegment(String segment) {
|
||||||
|
this.segment = segment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class Pair<K,V> {
|
||||||
|
|
||||||
|
K key;
|
||||||
|
|
||||||
|
V value;
|
||||||
|
|
||||||
|
Pair(K key, V value) {
|
||||||
|
this.key = key;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static <K,V> Pair<K,V> of(K key, V value) {
|
||||||
|
return new Pair<>(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
K getKey() {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
V getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,164 @@
|
||||||
|
package org.xbib.sru.client.jdk.util;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import static java.nio.charset.CodingErrorAction.REPLACE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See RFC 3986, RFC 1738 and <a href="http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding">...</a>.
|
||||||
|
*/
|
||||||
|
public final class UrlPercentEncoders {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* an encoder for RFC 3986 reg-names
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static final BitSet REG_NAME_BIT_SET = new BitSet();
|
||||||
|
|
||||||
|
private static final BitSet PATH_BIT_SET = new BitSet();
|
||||||
|
private static final BitSet MATRIX_BIT_SET = new BitSet();
|
||||||
|
private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
|
||||||
|
private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
|
||||||
|
private static final BitSet FRAGMENT_BIT_SET = new BitSet();
|
||||||
|
|
||||||
|
static {
|
||||||
|
// RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
|
||||||
|
// Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
|
||||||
|
addUnreserved(REG_NAME_BIT_SET);
|
||||||
|
addSubdelims(REG_NAME_BIT_SET);
|
||||||
|
|
||||||
|
// Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
|
||||||
|
addPChar(PATH_BIT_SET);
|
||||||
|
PATH_BIT_SET.clear((int) ';');
|
||||||
|
|
||||||
|
// Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded.
|
||||||
|
addPChar(MATRIX_BIT_SET);
|
||||||
|
MATRIX_BIT_SET.clear((int) ';');
|
||||||
|
MATRIX_BIT_SET.clear((int) '=');
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
|
||||||
|
* specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
|
||||||
|
*/
|
||||||
|
addQuery(UNSTRUCTURED_QUERY_BIT_SET);
|
||||||
|
UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
|
||||||
|
* pairs can be used.
|
||||||
|
*/
|
||||||
|
QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
|
||||||
|
QUERY_PARAM_BIT_SET.clear((int) '=');
|
||||||
|
QUERY_PARAM_BIT_SET.clear((int) '&');
|
||||||
|
|
||||||
|
addFragment(FRAGMENT_BIT_SET);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getRegNameEncoder() {
|
||||||
|
return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getPathEncoder() {
|
||||||
|
return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getMatrixEncoder() {
|
||||||
|
return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getUnstructuredQueryEncoder() {
|
||||||
|
return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getQueryParamEncoder() {
|
||||||
|
return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PercentEncoder getFragmentEncoder() {
|
||||||
|
return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||||
|
.onUnmappableCharacter(REPLACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
private UrlPercentEncoders() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add code points for 'fragment' chars
|
||||||
|
*
|
||||||
|
* @param fragmentBitSet bit set
|
||||||
|
*/
|
||||||
|
private static void addFragment(BitSet fragmentBitSet) {
|
||||||
|
addPChar(fragmentBitSet);
|
||||||
|
fragmentBitSet.set((int) '/');
|
||||||
|
fragmentBitSet.set((int) '?');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add code points for 'query' chars
|
||||||
|
*
|
||||||
|
* @param queryBitSet bit set
|
||||||
|
*/
|
||||||
|
private static void addQuery(BitSet queryBitSet) {
|
||||||
|
addPChar(queryBitSet);
|
||||||
|
queryBitSet.set((int) '/');
|
||||||
|
queryBitSet.set((int) '?');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add code points for 'pchar' chars.
|
||||||
|
*
|
||||||
|
* @param bs bitset
|
||||||
|
*/
|
||||||
|
private static void addPChar(BitSet bs) {
|
||||||
|
addUnreserved(bs);
|
||||||
|
addSubdelims(bs);
|
||||||
|
bs.set((int) ':');
|
||||||
|
bs.set((int) '@');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add codepoints for 'unreserved' chars
|
||||||
|
*
|
||||||
|
* @param bs bitset to add codepoints to
|
||||||
|
*/
|
||||||
|
private static void addUnreserved(BitSet bs) {
|
||||||
|
|
||||||
|
for (int i = 'a'; i <= 'z'; i++) {
|
||||||
|
bs.set(i);
|
||||||
|
}
|
||||||
|
for (int i = 'A'; i <= 'Z'; i++) {
|
||||||
|
bs.set(i);
|
||||||
|
}
|
||||||
|
for (int i = '0'; i <= '9'; i++) {
|
||||||
|
bs.set(i);
|
||||||
|
}
|
||||||
|
bs.set((int) '-');
|
||||||
|
bs.set((int) '.');
|
||||||
|
bs.set((int) '_');
|
||||||
|
bs.set((int) '~');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add codepoints for 'sub-delims' chars
|
||||||
|
*
|
||||||
|
* @param bs bitset to add codepoints to
|
||||||
|
*/
|
||||||
|
private static void addSubdelims(BitSet bs) {
|
||||||
|
bs.set((int) '!');
|
||||||
|
bs.set((int) '$');
|
||||||
|
bs.set((int) '&');
|
||||||
|
bs.set((int) '\'');
|
||||||
|
bs.set((int) '(');
|
||||||
|
bs.set((int) ')');
|
||||||
|
bs.set((int) '*');
|
||||||
|
bs.set((int) '+');
|
||||||
|
bs.set((int) ',');
|
||||||
|
bs.set((int) ';');
|
||||||
|
bs.set((int) '=');
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,8 +1,5 @@
|
||||||
package org.xbib.z3950.api;
|
package org.xbib.z3950.api;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface InitListener {
|
public interface InitListener {
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
package org.xbib.z3950.api;
|
package org.xbib.z3950.api;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface RecordListener {
|
public interface RecordListener {
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,6 @@ package org.xbib.z3950.api;
|
||||||
|
|
||||||
import org.xbib.asn1.BEREncoding;
|
import org.xbib.asn1.BEREncoding;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface ScanListener {
|
public interface ScanListener {
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,6 @@ package org.xbib.z3950.api;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface SearchListener {
|
public interface SearchListener {
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ class LVITest {
|
||||||
//String query = "@attr 1=12 \"(DE-101)1016677359\"" // record ID plus prefix OK!
|
//String query = "@attr 1=12 \"(DE-101)1016677359\"" // record ID plus prefix OK!
|
||||||
//String query = "@attr 1=12 \"(DE-600)2635378-7\"" // ZDB ID plus prefix NOT OK!
|
//String query = "@attr 1=12 \"(DE-600)2635378-7\"" // ZDB ID plus prefix NOT OK!
|
||||||
//String query = "@attr 1=1016 \"2020\"" // any OK!
|
//String query = "@attr 1=1016 \"2020\"" // any OK!
|
||||||
//String query = "@attr 1=1052 12-7" // ZDB-ID OK!
|
String query = "@attr 1=1052 12-7" // ZDB-ID OK!
|
||||||
String preferredRecordSyntax = "marc21"
|
String preferredRecordSyntax = "marc21"
|
||||||
int from = 1
|
int from = 1
|
||||||
int size = 1
|
int size = 1
|
||||||
|
|
Loading…
Reference in a new issue