add SRU client

This commit is contained in:
Jörg Prante 2023-01-22 23:07:51 +01:00
parent 96ba70d4dc
commit 351ea46d19
12 changed files with 1158 additions and 13 deletions

View file

@ -30,6 +30,7 @@ dependencyResolutionManagement {
}
}
include 'sru-client-jdk'
include 'z3950-asn1'
include 'z3950-api'
include 'z3950-common'

View file

@ -0,0 +1,92 @@
package org.xbib.sru.client.jdk;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.xbib.sru.client.jdk.util.UrlBuilder;
public class SRUClient {
private static final Logger logger = Logger.getLogger(SRUClient.class.getName());
private final Builder builder;
private final HttpClient httpClient;
private SRUClient(Builder builder) {
this.builder = builder;
this.httpClient = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.build();
}
public static Builder builder() {
return new Builder();
}
public void searchRetrieve(String query,
String recordSchema,
Integer startRecord,
Integer maximumRecords,
Consumer<Reader> consumer) throws IOException, InterruptedException {
UrlBuilder url = UrlBuilder.fromUrl(builder.baseURL);
url.queryParam(SRUConstants.OPERATION_PARAMETER, "searchRetrieve");
url.queryParam(SRUConstants.VERSION_PARAMETER, "1.1");
url.queryParam(SRUConstants.RECORD_SCHEMA_PARAMETER, recordSchema);
url.queryParam(SRUConstants.START_RECORD_PARAMETER, Integer.toString(startRecord));
url.queryParam(SRUConstants.MAXIMUM_RECORDS_PARAMETER, Integer.toString(maximumRecords));
url.queryParam(SRUConstants.QUERY_PARAMETER, query);
URI uri = URI.create(url.build().toExternalForm());
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(uri)
.header("accept", "utf-8")
.header("user-agent", builder.userAgent != null ? builder.userAgent : "xbib SRU client")
.GET()
.build();
logger.log(Level.INFO, "sending " + httpRequest);
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
int status = httpResponse.statusCode();
logger.log(Level.FINE, "response status = " + status + " headers = " + httpResponse.headers());
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
if (status == 200) {
String string = httpResponse.body();
if (string != null && string.length() > 0) {
consumer.accept(new StringReader(string));
}
}
}
public void close() {
}
public static class Builder {
private String baseURL;
private String userAgent;
private Builder() {
}
public Builder setBaseURL(String baseURL) {
this.baseURL = baseURL;
return this;
}
public Builder setUserAgent(String userAgent) {
this.userAgent = userAgent;
return this;
}
public SRUClient build() {
return new SRUClient(this);
}
}
}

View file

@ -0,0 +1,17 @@
package org.xbib.sru.client.jdk;
public interface SRUConstants {
String OPERATION_PARAMETER = "operation";
String VERSION_PARAMETER = "version";
String RECORD_SCHEMA_PARAMETER = "recordSchema";
String QUERY_PARAMETER = "query";
String START_RECORD_PARAMETER = "startRecord";
String MAXIMUM_RECORDS_PARAMETER = "maximumRecords";
}

View file

@ -0,0 +1,195 @@
package org.xbib.sru.client.jdk.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import static java.nio.charset.CoderResult.OVERFLOW;
import static java.nio.charset.CoderResult.UNDERFLOW;
/**
* Decodes percent-encoded (%XX) Unicode text.
*/
public final class PercentDecoder {
/**
* bytes represented by the current sequence of %-triples. Resized as needed.
*/
private ByteBuffer encodedBuf;
/**
* Written to with decoded chars by decoder
*/
private final CharBuffer decodedCharBuf;
private final CharsetDecoder decoder;
/**
* The decoded string for the current input
*/
private final StringBuilder outputBuf = new StringBuilder();
/**
* Construct a new PercentDecoder with default buffer sizes.
*
* @param charsetDecoder Charset to decode bytes into chars with
* @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
*/
public PercentDecoder(CharsetDecoder charsetDecoder) {
this(charsetDecoder, 16, 16);
}
/**
* @param charsetDecoder Charset to decode bytes into chars with
* @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
* @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
*/
public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
int decodedCharBufSize) {
encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
decoder = charsetDecoder;
}
/**
* @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
* "%20" for a space character
* @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
* @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
* detected
*/
public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
outputBuf.setLength(0);
// this is almost always an underestimate of the size needed:
// only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
outputBuf.ensureCapacity(input.length() / 8);
encodedBuf.clear();
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c != '%') {
handleEncodedBytes();
outputBuf.append(c);
continue;
}
if (i + 2 >= input.length()) {
throw new IllegalArgumentException(
"Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
}
// grow the byte buf if needed
if (encodedBuf.remaining() == 0) {
ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
encodedBuf.flip();
largerBuf.put(encodedBuf);
encodedBuf = largerBuf;
}
// note that we advance i here as we consume chars
int msBits = Character.digit(input.charAt(++i), 16);
int lsBits = Character.digit(input.charAt(++i), 16);
if (msBits == -1 || lsBits == -1) {
throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
}
msBits <<= 4;
msBits |= lsBits;
// msBits can only have 8 bits set, so cast is safe
encodedBuf.put((byte) msBits);
}
handleEncodedBytes();
return outputBuf.toString();
}
/**
* Decode any buffered encoded bytes and write them to the output buf.
*/
private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
if (encodedBuf.position() == 0) {
// nothing to do
return;
}
decoder.reset();
CoderResult coderResult;
// switch to reading mode
encodedBuf.flip();
// loop while we're filling up the decoded char buf, or there's any encoded bytes
// decode() in practice seems to only consume bytes when it can decode an entire char...
do {
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
throwIfError(coderResult);
appendDecodedChars();
} while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
// final decode with end-of-input flag
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
throwIfError(coderResult);
if (encodedBuf.hasRemaining()) {
throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
}
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
}
appendDecodedChars();
// we've finished the input, wrap it up
encodedBuf.clear();
flush();
}
/**
* Must only be called when the input encoded bytes buffer is empty
*/
private void flush() throws MalformedInputException, UnmappableCharacterException {
CoderResult coderResult;
decodedCharBuf.clear();
coderResult = decoder.flush(decodedCharBuf);
appendDecodedChars();
throwIfError(coderResult);
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Decoder flush resulted in " + coderResult);
}
}
/**
* If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
* CharacterCodingException.
*
* @param coderResult result to check
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
if (coderResult.isMalformed()) {
throw new MalformedInputException(coderResult.length());
}
if (coderResult.isUnmappable()) {
throw new UnmappableCharacterException(coderResult.length());
} }
/**
* Flip the decoded char buf and append it to the string bug
*/
private void appendDecodedChars() {
decodedCharBuf.flip();
outputBuf.append(decodedCharBuf);
}
}

View file

@ -0,0 +1,186 @@
package org.xbib.sru.client.jdk.util;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import java.util.BitSet;
import static java.lang.Character.isHighSurrogate;
import static java.lang.Character.isLowSurrogate;
/**
* Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
*
* This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
* PercentEncoder instances.
*/
public final class PercentEncoder {
private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
private final BitSet safeChars;
private final CharsetEncoder encoder;
/**
* Pre-allocate a string handler to make the common case of encoding to a string faster
*/
private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
private final ByteBuffer encodedBytes;
private final CharBuffer unsafeCharsToEncode;
/**
* @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
* those chars set to true. Treated as read only.
* @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances
* across threads.
*/
public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) {
this.safeChars = safeChars;
this.encoder = charsetEncoder;
// why is this a float? sigh.
int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
// need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
unsafeCharsToEncode = CharBuffer.allocate(2);
}
/**
* Encode the input and pass output chars to a handler.
*
* @param input input string
* @param handler handler to call on each output character
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws
MalformedInputException, UnmappableCharacterException {
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (safeChars.get(c)) {
handler.onOutputChar(c);
continue;
}
// not a safe char
unsafeCharsToEncode.clear();
unsafeCharsToEncode.append(c);
if (isHighSurrogate(c)) {
if (input.length() > i + 1) {
// get the low surrogate as well
char lowSurrogate = input.charAt(i + 1);
if (isLowSurrogate(lowSurrogate)) {
unsafeCharsToEncode.append(lowSurrogate);
i++;
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
.toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
.toHexString(lowSurrogate) + ")");
}
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
.toHexString(c) + ")");
}
}
flushUnsafeCharBuffer(handler);
}
}
/**
* Encode the input and return the resulting text as a String.
*
* @param input input string
* @return the input string with every character that's not in safeChars turned into its byte representation via the
* instance's encoder and then percent-encoded
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
stringHandler.reset();
stringHandler.ensureCapacity(input.length());
encode(input, stringHandler);
return stringHandler.getContents();
}
/**
* Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
*
* Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
*
*/
private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException,
UnmappableCharacterException {
// need to read from the char buffer, which was most recently written to
unsafeCharsToEncode.flip();
encodedBytes.clear();
encoder.reset();
CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
checkResult(result);
result = encoder.flush(encodedBytes);
checkResult(result);
// read contents of bytebuffer
encodedBytes.flip();
while (encodedBytes.hasRemaining()) {
byte b = encodedBytes.get();
handler.onOutputChar('%');
handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
handler.onOutputChar(HEX_CODE[b & 0xF]);
}
}
/**
* @param result result to check
* @throws IllegalStateException if result is overflow
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
if (result.isOverflow()) {
throw new IllegalStateException("Byte buffer overflow; this should not happen.");
}
if (result.isMalformed()) {
throw new MalformedInputException(result.length());
}
if (result.isUnmappable()) {
throw new UnmappableCharacterException(result.length());
}
}
private class StringBuilderPercentEncoderOutputHandler {
private final StringBuilder stringBuilder;
StringBuilderPercentEncoderOutputHandler() {
stringBuilder = new StringBuilder();
}
String getContents() {
return stringBuilder.toString();
}
void reset() {
stringBuilder.setLength(0);
}
void ensureCapacity(int length) {
stringBuilder.ensureCapacity(length);
}
void onOutputChar(char c) {
stringBuilder.append(c);
}
}
}

View file

@ -0,0 +1,502 @@
package org.xbib.sru.client.jdk.util;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getFragmentEncoder;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getMatrixEncoder;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getPathEncoder;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getQueryParamEncoder;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getRegNameEncoder;
import static org.xbib.sru.client.jdk.util.UrlPercentEncoders.getUnstructuredQueryEncoder;
/**
* Builder for urls with url-encoding applied to path, query param, etc.
*
* Escaping rules are from RFC 3986, RFC 1738 and the <a href ="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4 spec</a>
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
* HTTP-useful URLs.
*/
public final class UrlBuilder {
/**
* IPv6 address, cribbed from <a href="http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings">StackOverflow</a>
*/
private static final Pattern IPV6_PATTERN = Pattern
.compile(
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
/**
* IPv4 dotted quad
*/
private static final Pattern IPV4_PATTERN = Pattern
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
private final String scheme;
private final String host;
private final Integer port;
private final List<Pair<String, String>> queryParams = new ArrayList<>();
/**
* If this is non-null, queryParams must be empty, and vice versa.
*/
private String unstructuredQuery;
private final List<PathSegment> pathSegments = new ArrayList<>();
private final PercentEncoder pathEncoder = getPathEncoder();
private final PercentEncoder regNameEncoder = getRegNameEncoder();
private final PercentEncoder matrixEncoder = getMatrixEncoder();
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
private String fragment;
private boolean forceTrailingSlash = false;
/**
* Create a URL with UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
* @param port null or a positive integer
*/
private UrlBuilder(String scheme, String host, Integer port) {
this.host = host;
this.scheme = scheme;
this.port = port;
}
/**
* Create a URL with an null port and UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @return a url builder
* @see UrlBuilder#forHost(String scheme, String host, int port)
*/
public static UrlBuilder forHost(String scheme, String host) {
return new UrlBuilder(scheme, host, null);
}
/**
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @param port port
* @return a url builder
*/
public static UrlBuilder forHost(String scheme, String host, int port) {
return new UrlBuilder(scheme, host, port);
}
public static UrlBuilder fromUrl(String urlSpec) throws CharacterCodingException, MalformedURLException {
return fromUrl(new URL(urlSpec));
}
/**
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
* query string apply.
*
* @param url url to initialize builder with
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if char decoding fails
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url) throws CharacterCodingException {
return fromUrl(url, StandardCharsets.UTF_8.newDecoder());
}
/**
* Create a UrlBuilder initialized with the contents of a {@link URL}.
*
* The query string will be parsed into HTML4 query params if it can be separated into a
* <code>&amp;</code>-separated sequence of <code>key=value</code> pairs. The sequence of query params can then be
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
*
* @param url url to initialize builder with
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
* report errors
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws
CharacterCodingException {
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
// reg names must be encoded UTF-8
PercentDecoder regNameDecoder;
if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) {
regNameDecoder = decoder;
} else {
regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
}
Integer port = url.getPort();
if (port == -1) {
port = null;
}
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
buildFromPath(builder, decoder, url);
buildFromQuery(builder, decoder, url);
if (url.getRef() != null) {
builder.fragment(decoder.decode(url.getRef()));
}
return builder;
}
/**
* Add a path segment.
*
* @param segment a path segment
* @return this
*/
public UrlBuilder pathSegment(String segment) {
pathSegments.add(new PathSegment(segment));
return this;
}
/**
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
*
* @param segments path segments
* @return this
*/
public UrlBuilder pathSegments(String... segments) {
for (String segment : segments) {
pathSegment(segment);
}
return this;
}
/**
* Add an HTML query parameter. Query parameters will be encoded in the order added.
*
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
* <a href="http://www.w3.org/TR/html401/interact/forms.html#form-content-type">HTML 4</a>.
*
* If you use this method to build a query string, or created this builder from a url with a query string that can
* successfully be parsed into query param pairs, you cannot subsequently use {@link
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder queryParam(String name, String value) {
if (unstructuredQuery != null) {
throw new IllegalStateException(
"Cannot call queryParam() when this already has an unstructured query specified");
}
queryParams.add(Pair.of(name, value));
return this;
}
/**
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
* that query.
*
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
* CharsetDecoder)}.
*
* @param query Complete URI query, as specified by <a href="https://tools.ietf.org/html/rfc3986#section-3.4">RFC 3986</a>.
* @return this
*/
public UrlBuilder unstructuredQuery(String query) {
if (!queryParams.isEmpty()) {
throw new IllegalStateException(
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
}
unstructuredQuery = query;
return this;
}
/**
* Clear the unstructured query and any query params.
*
* Since the query / query param situation is a little complicated, this method will let you remove all query
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
* builder, and then re-doing its query params, for instance.
*
* @return this
*/
public UrlBuilder clearQuery() {
queryParams.clear();
unstructuredQuery = null;
return this;
}
/**
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
* root. Matrix params will be encoded in the order added.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder matrixParam(String name, String value) {
if (pathSegments.isEmpty()) {
// create an empty path segment to represent a matrix param applied to the root
pathSegment("");
}
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
seg.matrixParams.add(Pair.of(name, value));
return this;
}
/**
* Set the fragment.
*
* @param fragment fragment string
* @return this
*/
public UrlBuilder fragment(String fragment) {
this.fragment = fragment;
return this;
}
/**
* Force the generated URL to have a trailing slash at the end of the path.
*
* @return this
*/
public UrlBuilder forceTrailingSlash() {
forceTrailingSlash = true;
return this;
}
public URL build() throws CharacterCodingException, MalformedURLException {
return new URL(toUrlString());
}
/**
* Encode the current builder state into a URL string.
*
* @return a well-formed URL string
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
*/
public String toUrlString() throws CharacterCodingException {
StringBuilder buf = new StringBuilder();
buf.append(scheme);
buf.append("://");
buf.append(encodeHost(host));
if (port != null) {
buf.append(':');
buf.append(port);
}
for (PathSegment pathSegment : pathSegments) {
buf.append('/');
buf.append(pathEncoder.encode(pathSegment.segment));
for (Pair<String, String> matrixParam : pathSegment.matrixParams) {
buf.append(';');
buf.append(matrixEncoder.encode(matrixParam.getKey()));
buf.append('=');
buf.append(matrixEncoder.encode(matrixParam.getValue()));
}
}
if (forceTrailingSlash) {
buf.append('/');
}
if (!queryParams.isEmpty()) {
buf.append("?");
Iterator<Pair<String, String>> qpIter = queryParams.iterator();
while (qpIter.hasNext()) {
Pair<String, String> queryParam = qpIter.next();
buf.append(queryParamEncoder.encode(queryParam.getKey()));
buf.append('=');
buf.append(queryParamEncoder.encode(queryParam.getValue()));
if (qpIter.hasNext()) {
buf.append('&');
}
}
} else if (unstructuredQuery != null) {
buf.append("?");
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
}
if (fragment != null) {
buf.append('#');
buf.append(fragmentEncoder.encode(fragment));
}
return buf.toString();
}
/**
* Populate a url builder based on the query of a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
if (url.getQuery() != null) {
String q = url.getQuery();
// try to parse into &-separated key=value pairs
List<Pair<String, String>> pairs = new ArrayList<>();
boolean parseOk = true;
for (String queryChunk : q.split("&")) {
String[] queryParamChunks = queryChunk.split("=");
if (queryParamChunks.length != 2) {
parseOk = false;
break;
}
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
decoder.decode(queryParamChunks[1])));
}
if (parseOk) {
for (Pair<String, String> pair : pairs) {
builder.queryParam(pair.getKey(), pair.getValue());
}
} else {
builder.unstructuredQuery(decoder.decode(q));
}
}
}
/**
* Populate the path segments of a url builder from a url
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException
*/
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
for (String pathChunk : url.getPath().split("/")) {
if (pathChunk.equals("")) {
continue;
}
if (pathChunk.charAt(0) == ';') {
builder.pathSegment("");
// empty path segment, but matrix params
for (String matrixChunk : pathChunk.substring(1).split(";")) {
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
}
continue;
}
// otherwise, path chunk is non empty and does not start with a ';'
String[] matrixChunks = pathChunk.split(";");
// first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will
// not be included in the final url.
builder.pathSegment(decoder.decode(matrixChunks[0]));
// if there any other chunks, they're matrix param pairs
for (int i = 1; i < matrixChunks.length; i++) {
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
}
}
}
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
CharacterCodingException {
String[] mtxPair = pathMatrixChunk.split("=");
if (mtxPair.length != 2) {
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
}
String mtxName = mtxPair[0];
String mtxVal = mtxPair[1];
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
}
/**
* @param host original host string
* @return host encoded as in RFC 3986 section 3.2.2
*/
private String encodeHost(String host) throws CharacterCodingException {
// matching order: IP-literal, IPv4, reg-name
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
return host;
}
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
return regNameEncoder.encode(host);
}
/**
* Bundle of a path segment name and any associated matrix params.
*/
private static class PathSegment {
private final String segment;
private final List<Pair<String, String>> matrixParams = new ArrayList<>();
PathSegment(String segment) {
this.segment = segment;
}
}
static class Pair<K,V> {
K key;
V value;
Pair(K key, V value) {
this.key = key;
this.value = value;
}
static <K,V> Pair<K,V> of(K key, V value) {
return new Pair<>(key, value);
}
K getKey() {
return key;
}
V getValue() {
return value;
}
}
}

View file

@ -0,0 +1,164 @@
package org.xbib.sru.client.jdk.util;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import static java.nio.charset.CodingErrorAction.REPLACE;
/**
* See RFC 3986, RFC 1738 and <a href="http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding">...</a>.
*/
public final class UrlPercentEncoders {
/**
* an encoder for RFC 3986 reg-names
*/
private static final BitSet REG_NAME_BIT_SET = new BitSet();
private static final BitSet PATH_BIT_SET = new BitSet();
private static final BitSet MATRIX_BIT_SET = new BitSet();
private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
private static final BitSet FRAGMENT_BIT_SET = new BitSet();
static {
// RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
// Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
addUnreserved(REG_NAME_BIT_SET);
addSubdelims(REG_NAME_BIT_SET);
// Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
addPChar(PATH_BIT_SET);
PATH_BIT_SET.clear((int) ';');
// Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded.
addPChar(MATRIX_BIT_SET);
MATRIX_BIT_SET.clear((int) ';');
MATRIX_BIT_SET.clear((int) '=');
/*
* At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
* specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
*/
addQuery(UNSTRUCTURED_QUERY_BIT_SET);
UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
/*
* Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
* pairs can be used.
*/
QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
QUERY_PARAM_BIT_SET.clear((int) '=');
QUERY_PARAM_BIT_SET.clear((int) '&');
addFragment(FRAGMENT_BIT_SET);
}
public static PercentEncoder getRegNameEncoder() {
return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getPathEncoder() {
return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getMatrixEncoder() {
return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getUnstructuredQueryEncoder() {
return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getQueryParamEncoder() {
return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getFragmentEncoder() {
return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
private UrlPercentEncoders() {
}
/**
* Add code points for 'fragment' chars
*
* @param fragmentBitSet bit set
*/
private static void addFragment(BitSet fragmentBitSet) {
addPChar(fragmentBitSet);
fragmentBitSet.set((int) '/');
fragmentBitSet.set((int) '?');
}
/**
* Add code points for 'query' chars
*
* @param queryBitSet bit set
*/
private static void addQuery(BitSet queryBitSet) {
addPChar(queryBitSet);
queryBitSet.set((int) '/');
queryBitSet.set((int) '?');
}
/**
* Add code points for 'pchar' chars.
*
* @param bs bitset
*/
private static void addPChar(BitSet bs) {
addUnreserved(bs);
addSubdelims(bs);
bs.set((int) ':');
bs.set((int) '@');
}
/**
* Add codepoints for 'unreserved' chars
*
* @param bs bitset to add codepoints to
*/
private static void addUnreserved(BitSet bs) {
for (int i = 'a'; i <= 'z'; i++) {
bs.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
bs.set(i);
}
for (int i = '0'; i <= '9'; i++) {
bs.set(i);
}
bs.set((int) '-');
bs.set((int) '.');
bs.set((int) '_');
bs.set((int) '~');
}
/**
* Add codepoints for 'sub-delims' chars
*
* @param bs bitset to add codepoints to
*/
private static void addSubdelims(BitSet bs) {
bs.set((int) '!');
bs.set((int) '$');
bs.set((int) '&');
bs.set((int) '\'');
bs.set((int) '(');
bs.set((int) ')');
bs.set((int) '*');
bs.set((int) '+');
bs.set((int) ',');
bs.set((int) ';');
bs.set((int) '=');
}
}

View file

@ -1,8 +1,5 @@
package org.xbib.z3950.api;
/**
*
*/
@FunctionalInterface
public interface InitListener {

View file

@ -1,8 +1,5 @@
package org.xbib.z3950.api;
/**
*
*/
@FunctionalInterface
public interface RecordListener {

View file

@ -2,9 +2,6 @@ package org.xbib.z3950.api;
import org.xbib.asn1.BEREncoding;
/**
*
*/
@FunctionalInterface
public interface ScanListener {

View file

@ -2,9 +2,6 @@ package org.xbib.z3950.api;
import java.io.IOException;
/**
*
*/
@FunctionalInterface
public interface SearchListener {

View file

@ -31,7 +31,7 @@ class LVITest {
//String query = "@attr 1=12 \"(DE-101)1016677359\"" // record ID plus prefix OK!
//String query = "@attr 1=12 \"(DE-600)2635378-7\"" // ZDB ID plus prefix NOT OK!
//String query = "@attr 1=1016 \"2020\"" // any OK!
//String query = "@attr 1=1052 12-7" // ZDB-ID OK!
String query = "@attr 1=1052 12-7" // ZDB-ID OK!
String preferredRecordSyntax = "marc21"
int from = 1
int size = 1