add xbib net dependency, update to Gradle 4.1, add some convenience methods
This commit is contained in:
parent
73afbd806f
commit
14643b140f
46 changed files with 347 additions and 2019 deletions
39
build.gradle
39
build.gradle
|
@ -1,16 +1,25 @@
|
|||
|
||||
plugins {
|
||||
id "org.sonarqube" version "2.2"
|
||||
id "org.ajoberstar.github-pages" version "1.6.0-rc.1"
|
||||
id "org.xbib.gradle.plugin.jbake" version "1.2.1"
|
||||
id "org.sonarqube" version "2.5"
|
||||
id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0"
|
||||
id "io.codearte.nexus-staging" version "0.7.0"
|
||||
}
|
||||
|
||||
ext {
|
||||
versions = [
|
||||
'jackson' : '2.8.4'
|
||||
]
|
||||
}
|
||||
printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" +
|
||||
"Build: group: ${project.group} name: ${project.name} version: ${project.version}\n",
|
||||
InetAddress.getLocalHost(),
|
||||
System.getProperty("os.name"),
|
||||
System.getProperty("os.arch"),
|
||||
System.getProperty("os.version"),
|
||||
System.getProperty("java.version"),
|
||||
System.getProperty("java.vm.version"),
|
||||
System.getProperty("java.vm.vendor"),
|
||||
System.getProperty("java.vm.name"),
|
||||
GroovySystem.getVersion(),
|
||||
gradle.gradleVersion
|
||||
|
||||
apply plugin: 'build-dashboard'
|
||||
apply plugin: "io.codearte.nexus-staging"
|
||||
|
||||
allprojects {
|
||||
|
||||
|
@ -21,18 +30,20 @@ allprojects {
|
|||
apply plugin: 'pmd'
|
||||
apply plugin: 'checkstyle'
|
||||
apply plugin: "jacoco"
|
||||
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
configurations {
|
||||
asciidoclet
|
||||
wagon
|
||||
}
|
||||
|
||||
dependencies {
|
||||
testCompile 'junit:junit:4.12'
|
||||
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10'
|
||||
wagon 'org.apache.maven.wagon:wagon-ssh:2.12'
|
||||
}
|
||||
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
|
@ -43,6 +54,12 @@ allprojects {
|
|||
options.compilerArgs << "-Xlint:all" << "-profile" << "compact1"
|
||||
}
|
||||
|
||||
jar {
|
||||
manifest {
|
||||
attributes('Implementation-Version': project.version)
|
||||
}
|
||||
}
|
||||
|
||||
test {
|
||||
testLogging {
|
||||
showStandardStreams = false
|
||||
|
@ -50,6 +67,10 @@ allprojects {
|
|||
}
|
||||
}
|
||||
|
||||
clean {
|
||||
delete 'out'
|
||||
}
|
||||
|
||||
task sourcesJar(type: Jar, dependsOn: classes) {
|
||||
classifier 'sources'
|
||||
from sourceSets.main.allSource
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
dependencies {
|
||||
compile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
|
||||
compile "com.fasterxml.jackson.core:jackson-core:${project.property('jackson.version')}"
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@ package org.xbib.content;
|
|||
|
||||
import org.xbib.content.io.BytesReference;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.Flushable;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.math.BigDecimal;
|
||||
|
@ -10,7 +12,7 @@ import java.math.BigInteger;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public interface XContentGenerator {
|
||||
public interface XContentGenerator extends Flushable, Closeable {
|
||||
|
||||
XContent content();
|
||||
|
||||
|
@ -115,7 +117,4 @@ public interface XContentGenerator {
|
|||
|
||||
void copyCurrentStructure(XContentParser parser) throws IOException;
|
||||
|
||||
void flush() throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
||||
|
|
|
@ -40,6 +40,14 @@ public class BytesArray implements BytesReference {
|
|||
this.length = length;
|
||||
}
|
||||
|
||||
public void write(byte[] b) {
|
||||
byte[] c = new byte[length + b.length];
|
||||
System.arraycopy(bytes, 0, c, 0, length);
|
||||
System.arraycopy(b, 0, c, bytes.length, b.length);
|
||||
this.bytes = c;
|
||||
this.offset = 0;
|
||||
this.length = c.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
package org.xbib.content.settings;
|
||||
|
||||
import org.xbib.content.XContent;
|
||||
import org.xbib.content.XContentGenerator;
|
||||
import org.xbib.content.XContentParser;
|
||||
import org.xbib.content.io.BytesReference;
|
||||
import org.xbib.content.io.BytesStreamOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -24,6 +27,32 @@ public abstract class AbstractSettingsLoader implements SettingsLoader {
|
|||
}
|
||||
}
|
||||
|
||||
public Map<String, String> load(BytesReference bytesReference) throws IOException {
|
||||
try (XContentParser parser = content().createParser(bytesReference)) {
|
||||
return load(parser);
|
||||
}
|
||||
}
|
||||
|
||||
public String flatMapAsString(BytesReference bytesReference) throws IOException {
|
||||
try (XContentParser parser = content().createParser(bytesReference);
|
||||
BytesStreamOutput bytesStreamOutput = new BytesStreamOutput();
|
||||
XContentGenerator generator = content().createGenerator(bytesStreamOutput)) {
|
||||
generator.writeStartObject();
|
||||
for (Map.Entry<String, String> entry : load(parser).entrySet()) {
|
||||
generator.writeFieldName(entry.getKey());
|
||||
String value = entry.getValue();
|
||||
if (value == null) {
|
||||
generator.writeNull();
|
||||
} else {
|
||||
generator.writeString(value);
|
||||
}
|
||||
}
|
||||
generator.writeEndObject();
|
||||
generator.flush();
|
||||
return bytesStreamOutput.bytes().toUtf8();
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, String> load(XContentParser xContentParser) throws IOException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Map<String, String> map = new HashMap<>();
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
package org.xbib.content.settings;
|
||||
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.xbib.content.XContentBuilder;
|
||||
import org.xbib.content.XContentHelper;
|
||||
import org.xbib.content.io.BytesArray;
|
||||
import org.xbib.content.io.BytesReference;
|
||||
import org.xbib.content.json.JsonSettingsLoader;
|
||||
import org.xbib.content.json.JsonXContent;
|
||||
|
||||
|
@ -123,4 +123,12 @@ public class SettingsTest extends Assert {
|
|||
assertEquals("{\"a.b\":\"c\"}", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFlatMapAsString() throws IOException {
|
||||
String s = "{\"a\":{\"b\":\"c\"}}";
|
||||
BytesReference ref = new BytesArray(s.getBytes(StandardCharsets.UTF_8));
|
||||
JsonSettingsLoader loader = new JsonSettingsLoader();
|
||||
String result = loader.flatMapAsString(ref);
|
||||
assertEquals("{\"a.b\":\"c\"}", result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
dependencies {
|
||||
compile "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
|
||||
compile "com.fasterxml.jackson.core:jackson-databind:${project.property('jackson.version')}"
|
||||
testCompile('junit:junit:4.12') {
|
||||
exclude group: 'org.hamcrest'
|
||||
}
|
||||
|
@ -7,4 +7,4 @@ dependencies {
|
|||
exclude group: 'org.hamcrest'
|
||||
}
|
||||
testCompile 'org.hamcrest:hamcrest-all:1.3'
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.xbib.content.rdf.RdfContentParams;
|
|||
import org.xbib.content.rdf.Resource;
|
||||
import org.xbib.content.rdf.internal.DefaultAnonymousResource;
|
||||
import org.xbib.content.resource.IRI;
|
||||
import org.xbib.content.xml.util.XMLUtil;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
@ -185,7 +186,7 @@ public abstract class AbstractXmlHandler<P extends RdfContentParams>
|
|||
}
|
||||
|
||||
public String content() {
|
||||
String s = content.toString().trim();
|
||||
String s = XMLUtil.sanitizeToLineFeed(content.toString()).trim();
|
||||
return s.length() > 0 ? s : null;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.xbib.content.rdf.RdfContentParams;
|
|||
import org.xbib.content.rdf.RdfContentParser;
|
||||
import org.xbib.content.rdf.RdfContentType;
|
||||
import org.xbib.content.rdf.StandardRdfContentType;
|
||||
import org.xbib.content.rdf.util.NormalizeEolFilter;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
@ -37,7 +38,7 @@ public class XmlContentParser<P extends RdfContentParams> implements RdfContentP
|
|||
}
|
||||
|
||||
public XmlContentParser(Reader reader) {
|
||||
this.reader = reader;
|
||||
this.reader = new NormalizeEolFilter(reader, System.getProperty("line.separator"), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
package org.xbib.content.rdf.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class NormalizeEolFilter extends SimpleFilterReader {
|
||||
|
||||
private boolean previousWasEOL;
|
||||
|
||||
private boolean fixLast;
|
||||
|
||||
private int normalizedEOL = 0;
|
||||
|
||||
private char[] eol = null;
|
||||
|
||||
public NormalizeEolFilter(Reader in, String eolString, boolean fixLast) {
|
||||
super(in);
|
||||
eol = eolString.toCharArray();
|
||||
this.fixLast = fixLast;
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
int thisChar = super.read();
|
||||
if (normalizedEOL == 0) {
|
||||
int numEOL = 0;
|
||||
boolean atEnd = false;
|
||||
switch (thisChar) {
|
||||
case '\u001A':
|
||||
int c = super.read();
|
||||
if (c == -1) {
|
||||
atEnd = true;
|
||||
if (fixLast && !previousWasEOL) {
|
||||
numEOL = 1;
|
||||
push(thisChar);
|
||||
}
|
||||
} else {
|
||||
push(c);
|
||||
}
|
||||
break;
|
||||
case -1:
|
||||
atEnd = true;
|
||||
if (fixLast && !previousWasEOL) {
|
||||
numEOL = 1;
|
||||
}
|
||||
break;
|
||||
case '\n':
|
||||
numEOL = 1;
|
||||
break;
|
||||
case '\r':
|
||||
numEOL = 1;
|
||||
int c1 = super.read();
|
||||
int c2 = super.read();
|
||||
if (c1 != '\r' || c2 != '\n') {
|
||||
if (c1 == '\r') {
|
||||
numEOL = 2;
|
||||
push(c2);
|
||||
} else if (c1 == '\n') {
|
||||
push(c2);
|
||||
} else {
|
||||
push(c2);
|
||||
push(c1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (numEOL > 0) {
|
||||
while (numEOL-- > 0) {
|
||||
push(eol);
|
||||
normalizedEOL += eol.length;
|
||||
}
|
||||
previousWasEOL = true;
|
||||
thisChar = read();
|
||||
} else if (!atEnd) {
|
||||
previousWasEOL = false;
|
||||
}
|
||||
} else {
|
||||
normalizedEOL--;
|
||||
}
|
||||
return thisChar;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
package org.xbib.content.rdf.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* This filter reader redirects all read I/O methods through its own read() method.
|
||||
*/
|
||||
public class SimpleFilterReader extends Reader {
|
||||
|
||||
private static final int PREEMPT_BUFFER_LENGTH = 16;
|
||||
|
||||
private Reader in;
|
||||
|
||||
private int[] preempt = new int[PREEMPT_BUFFER_LENGTH];
|
||||
|
||||
private int preemptIndex = 0;
|
||||
|
||||
public SimpleFilterReader(Reader in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
public void push(char c) {
|
||||
push((int) c);
|
||||
}
|
||||
|
||||
public void push(int c) {
|
||||
try {
|
||||
preempt[preemptIndex++] = c;
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
int[] p2 = new int[preempt.length * 2];
|
||||
System.arraycopy(preempt, 0, p2, 0, preempt.length);
|
||||
preempt = p2;
|
||||
push(c);
|
||||
}
|
||||
}
|
||||
|
||||
public void push(char[] cs, int start, int length) {
|
||||
for (int i = start + length - 1; i >= start;) {
|
||||
push(cs[i--]);
|
||||
}
|
||||
}
|
||||
|
||||
public void push(char[] cs) {
|
||||
push(cs, 0, cs.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return preemptIndex > 0 ? preempt[--preemptIndex] : in.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
in.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return in.markSupported();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ready() throws IOException {
|
||||
return in.ready();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void mark(int i) throws IOException {
|
||||
in.mark(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long i) throws IOException {
|
||||
return in.skip(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] buf) throws IOException {
|
||||
return read(buf, 0, buf.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] buf, int start, int length) throws IOException {
|
||||
int count = 0;
|
||||
int c = 0;
|
||||
while (length-- > 0 && (c = this.read()) != -1) {
|
||||
buf[start++] = (char) c;
|
||||
count++;
|
||||
}
|
||||
return (count == 0 && c == -1) ? -1 : count;
|
||||
}
|
||||
}
|
|
@ -9,12 +9,12 @@ import org.xbib.content.rdf.io.IOTests;
|
|||
import org.xbib.content.rdf.io.turtle.TurtleContentParams;
|
||||
import org.xbib.content.resource.IRI;
|
||||
import org.xbib.content.resource.IRINamespaceContext;
|
||||
import org.xbib.content.resource.text.CharUtils;
|
||||
import org.xbib.content.resource.url.UrlEncoding;
|
||||
import org.xbib.helper.StreamTester;
|
||||
import org.xbib.net.PercentEncoders;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
|
@ -50,8 +50,8 @@ public class OAITest extends StreamTester {
|
|||
if ("identifier".equals(name.getLocalPart())) {
|
||||
// make sure we can build an opaque IRI, whatever is out there
|
||||
try {
|
||||
getResource().setId(IRI.create("id:"
|
||||
+ UrlEncoding.encode(value, CharUtils.Profile.SCHEMESPECIFICPART.filter())));
|
||||
getResource().setId(IRI.create("id:" +
|
||||
PercentEncoders.getRegNameEncoder(StandardCharsets.UTF_8).encode(value)));
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -80,8 +80,7 @@ public class OAITest extends StreamTester {
|
|||
.setDefaultNamespace("oai", "http://www.openarchives.org/OAI/2.0/oai_dc/");
|
||||
XmlContentParser<TurtleContentParams> parser = new XmlContentParser<>(in);
|
||||
parser.builder(builder);
|
||||
parser.setHandler(xmlHandler)
|
||||
.parse();
|
||||
parser.setHandler(xmlHandler).parse();
|
||||
assertStream(getClass().getResourceAsStream("oai.ttl"), builder.streamInput());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,12 +15,12 @@ import org.xbib.content.rdf.io.ntriple.NTripleContentParams;
|
|||
import org.xbib.content.rdf.io.turtle.TurtleContentParams;
|
||||
import org.xbib.content.resource.IRI;
|
||||
import org.xbib.content.resource.IRINamespaceContext;
|
||||
import org.xbib.content.resource.text.CharUtils.Profile;
|
||||
import org.xbib.content.resource.url.UrlEncoding;
|
||||
import org.xbib.helper.StreamTester;
|
||||
import org.xbib.net.PercentEncoders;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
|
@ -61,8 +61,8 @@ public class XmlReaderTest extends StreamTester {
|
|||
if ("identifier".equals(name.getLocalPart()) && DefaultResource.isBlank(getResource())) {
|
||||
try {
|
||||
// make sure we can build an opaque IRI, whatever is out there
|
||||
String s = UrlEncoding.encode(value, Profile.SCHEMESPECIFICPART.filter());
|
||||
getResource().setId(IRI.create("id:" + s));
|
||||
getResource().setId(IRI.create("id:" +
|
||||
PercentEncoders.getRegNameEncoder(StandardCharsets.UTF_8).encode(value)));
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ public class XmlReaderTest extends StreamTester {
|
|||
|
||||
@Override
|
||||
public MyBuilder receive(Resource resource) throws IOException {
|
||||
resource.triples().forEach(triples::add);
|
||||
triples.addAll(resource.triples());
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
||||
@prefix oaidc: <http://www.openarchives.org/OAI/2.0/oai_dc/> .
|
||||
|
||||
<id:http://oro.open.ac.uk/25656/5/butler%2Decoop11.pdf> dc:title "Improving the tokenisation of identifier names";
|
||||
<id:http%3A%2F%2Foro.open.ac.uk%2F25656%2F5%2Fbutler%252Decoop11.pdf> dc:title "Improving the tokenisation of identifier names";
|
||||
dc:creator "Butler, Simon", "Wermelinger, Michel", "Yu, Yijun", "Sharp, Helen";
|
||||
dc:description """Identifier names are the main vehicle for semantic information during program comprehension. For tool-supported program comprehension tasks, including concept location and requirements traceability, identifier names need to be tokenised into their semantic constituents. In this paper we present an approach to the automated tokenisation of identifier names that improves on existing techniques in two ways. First, it improves the tokenisation accuracy for single-case identifier names and for identifier names containing digits, which existing techniques largely ignore. Second, performance gains over existing techniques are achieved using smaller oracles, making the approach easier to deploy.
|
||||
|
||||
dc:description """Identifier names are the main vehicle for semantic information during program comprehension. For tool-supported program comprehension tasks, including concept location and requirements traceability, identifier names need to be tokenised into their semantic constituents. In this paper we present an approach to the automated tokenisation of identifier names that improves on existing techniques in two ways. First, it improves the tokenisation accuracy for single-case identifier names and for identifier names containing digits, which existing techniques largely ignore. Second, performance gains over existing techniques are achieved using smaller oracles, making the approach easier to deploy.
|
||||
|
||||
Accuracy was evaluated by comparing our algorithm to manual tokenizations of 28,000 identifier names drawn from 60 well-known open source Java projects totalling 16.5 MSLOC. Moreover, the projects were used to perform a study of identifier tokenisation features (single case, camel case, use of digits, etc.) per object-oriented construct (class names, method names, local variable names, etc.), thus providing an insight into naming conventions in industrial-scale object-oriented code. Our tokenisation tool and datasets are publicly available.""";
|
||||
dc:publisher "Springer Verlag";
|
||||
dc:contributor "Mira, Mezini";
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
@prefix oai: <http://www.openarchives.org/OAI/2.0/oai_dc/> .
|
||||
|
||||
<id:oai:doaj-articles:8f128127aa68db508da01e428930bbcc> oai:OAI-PMH [
|
||||
<id:oai%3Adoaj-articles%3A8f128127aa68db508da01e428930bbcc> oai:OAI-PMH [
|
||||
oai:responseDate "2013-06-29T18:31:40Z";
|
||||
oai:request "http://www.doaj.org/oai.article";
|
||||
oai:ListRecords [
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
|
||||
tasks.withType(JavaCompile) {
|
||||
options.compilerArgs << "-Xlint:all" << "-profile" << "compact2"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
compile "org.xbib:net:${project.property('xbib-net.version')}"
|
||||
testCompile "com.fasterxml.jackson.core:jackson-databind:${project.property('jackson.version')}"
|
||||
}
|
|
@ -1,18 +1,19 @@
|
|||
package org.xbib.content.resource;
|
||||
|
||||
import org.xbib.content.resource.scheme.HttpScheme;
|
||||
import org.xbib.content.resource.scheme.Scheme;
|
||||
import org.xbib.content.resource.scheme.SchemeRegistry;
|
||||
import org.xbib.content.resource.text.CharUtils;
|
||||
import org.xbib.content.resource.text.CharUtils.Profile;
|
||||
import org.xbib.content.resource.text.InvalidCharacterException;
|
||||
import org.xbib.content.resource.url.UrlEncoding;
|
||||
import org.xbib.net.PercentDecoder;
|
||||
import org.xbib.net.PercentEncoders;
|
||||
import org.xbib.net.scheme.Scheme;
|
||||
import org.xbib.net.scheme.SchemeRegistry;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.IDN;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -177,25 +178,6 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
return new IRI(schemeClass, scheme, authority, userinfo, host, port, path, query, fragment);
|
||||
}
|
||||
|
||||
public static IRI normalize(IRI iri) {
|
||||
if (iri.isOpaque() || iri.getPath() == null) {
|
||||
return iri;
|
||||
}
|
||||
IRI normalized = null;
|
||||
if (iri.schemeClass != null) {
|
||||
normalized = iri.schemeClass.normalize(iri);
|
||||
}
|
||||
try {
|
||||
return normalized != null ? normalized : new IRI(iri.schemeClass, iri.getScheme(), iri.getAuthority(), iri
|
||||
.getUserInfo(), iri.getHost(), iri.getPort(), normalize(iri.getPath()), UrlEncoding.encode(UrlEncoding
|
||||
.decode(iri.getQuery()), Profile.IQUERY.filter()), UrlEncoding
|
||||
.encode(UrlEncoding.decode(iri.getFragment()), Profile.IFRAGMENT.filter()));
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static String normalize(String path) {
|
||||
if (path == null || path.length() == 0) {
|
||||
return "/";
|
||||
|
@ -211,13 +193,14 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
segments[n] = null;
|
||||
}
|
||||
}
|
||||
PercentDecoder percentDecoder = new PercentDecoder();
|
||||
for (String segment : segments) {
|
||||
if (segment != null) {
|
||||
if (buf.length() > 1) {
|
||||
buf.append('/');
|
||||
}
|
||||
try {
|
||||
buf.append(UrlEncoding.encode(UrlEncoding.decode(segment), Profile.IPATHNODELIMS_SEG.filter()));
|
||||
buf.append(PercentEncoders.getMatrixEncoder(StandardCharsets.UTF_8).encode(percentDecoder.decode(segment)));
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -440,13 +423,13 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
if (authority != null && asciiAuthority == null) {
|
||||
asciiAuthority = buildASCIIAuthority();
|
||||
}
|
||||
return (asciiAuthority != null && asciiAuthority.length() > 0) ? asciiAuthority : null;
|
||||
return asciiAuthority != null && asciiAuthority.length() > 0 ? asciiAuthority : null;
|
||||
}
|
||||
|
||||
public String getASCIIFragment() {
|
||||
if (fragment != null && asciiFragment == null) {
|
||||
try {
|
||||
asciiFragment = UrlEncoding.encode(fragment, Profile.FRAGMENT.filter());
|
||||
asciiFragment = PercentEncoders.getFragmentEncoder(StandardCharsets.UTF_8).encode(fragment);
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -457,7 +440,7 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
public String getASCIIPath() {
|
||||
if (path != null && asciiPath == null) {
|
||||
try {
|
||||
asciiPath = UrlEncoding.encode(path, Profile.PATH.filter());
|
||||
asciiPath = PercentEncoders.getPathEncoder(StandardCharsets.UTF_8).encode(path);
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -468,7 +451,7 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
public String getASCIIQuery() {
|
||||
if (query != null && asciiQuery == null) {
|
||||
try {
|
||||
asciiQuery = UrlEncoding.encode(query, Profile.QUERY.filter(), Profile.PATH.filter());
|
||||
asciiQuery = PercentEncoders.getQueryEncoder(StandardCharsets.UTF_8).encode(query);
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -479,7 +462,7 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
public String getASCIIUserInfo() {
|
||||
if (userinfo != null && asciiUserinfo == null) {
|
||||
try {
|
||||
asciiUserinfo = UrlEncoding.encode(userinfo, Profile.USERINFO.filter());
|
||||
asciiUserinfo = PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(userinfo);
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
}
|
||||
|
@ -497,18 +480,9 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
}
|
||||
|
||||
private String buildASCIIAuthority() {
|
||||
if (schemeClass instanceof HttpScheme) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buildAuthority(buf, getASCIIUserInfo(), getASCIIHost(), getPort());
|
||||
return buf.toString();
|
||||
} else {
|
||||
try {
|
||||
return UrlEncoding.encode(authority, Profile.AUTHORITY.filter());
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buildAuthority(buf, getASCIIUserInfo(), getASCIIHost(), getPort());
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
public boolean isAbsolute() {
|
||||
|
@ -538,10 +512,6 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
return resolve(this, new IRI(iri));
|
||||
}
|
||||
|
||||
public IRI normalize() {
|
||||
return normalize(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
|
@ -555,7 +525,7 @@ public class IRI implements Comparable<IRI>, Node {
|
|||
|
||||
public String toEncodedString() {
|
||||
try {
|
||||
return UrlEncoding.encode(toString(), Profile.SCHEMESPECIFICPART.filter());
|
||||
return PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(toString());
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
return null;
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
import org.xbib.content.resource.IRI;
|
||||
|
||||
/**
|
||||
* Base implementation for IRI scheme providers.
|
||||
*/
|
||||
public abstract class AbstractScheme implements Scheme {
|
||||
|
||||
protected final String name;
|
||||
protected final int port;
|
||||
|
||||
protected AbstractScheme(String name, int port) {
|
||||
this.name = name;
|
||||
this.port = port;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDefaultPort() {
|
||||
return port;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default return unmodified.
|
||||
*/
|
||||
@Override
|
||||
public IRI normalize(IRI iri) {
|
||||
return iri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default return unmodified.
|
||||
*/
|
||||
@Override
|
||||
public String normalizePath(String path) {
|
||||
return path;
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DefaultScheme extends AbstractScheme {
|
||||
|
||||
public DefaultScheme(String name) {
|
||||
super(name, -1);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FtpScheme extends HttpScheme {
|
||||
|
||||
static final String FTP_SCHEME_NAME = "ftp";
|
||||
|
||||
private static final int DEFAULT_PORT = 21;
|
||||
|
||||
public FtpScheme() {
|
||||
super(FTP_SCHEME_NAME, DEFAULT_PORT);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
import org.xbib.content.resource.IRI;
|
||||
import org.xbib.content.resource.text.CharUtils.Profile;
|
||||
import org.xbib.content.resource.url.UrlEncoding;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class HttpScheme extends AbstractScheme {
|
||||
|
||||
static final String HTTP_SCHEME_NAME = "http";
|
||||
private static final Logger logger = Logger.getLogger(HttpScheme.class.getName());
|
||||
private static final int DEFAULT_PORT = 80;
|
||||
|
||||
HttpScheme() {
|
||||
super(HTTP_SCHEME_NAME, DEFAULT_PORT);
|
||||
}
|
||||
|
||||
HttpScheme(String name, int port) {
|
||||
super(name, port);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IRI normalize(IRI iri) {
|
||||
int port = (iri.getPort() == getDefaultPort()) ? -1 : iri.getPort();
|
||||
String host = iri.getHost();
|
||||
if (host != null) {
|
||||
host = host.toLowerCase();
|
||||
}
|
||||
try {
|
||||
return IRI.builder()
|
||||
.scheme(iri.getScheme())
|
||||
.userinfo(iri.getUserInfo())
|
||||
.host(host)
|
||||
.port(port)
|
||||
.path(iri.getPath())
|
||||
.query(UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()), Profile.IQUERY.filter()))
|
||||
.fragment(UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()), Profile.IFRAGMENT.filter()))
|
||||
.build();
|
||||
} catch (IOException e) {
|
||||
logger.log(Level.FINE, e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizePath(String path) {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
class HttpsScheme extends HttpScheme {
|
||||
|
||||
static final String HTTPS_SCHEME_NAME = "https";
|
||||
private static final int DEFAULT_PORT = 443;
|
||||
|
||||
public HttpsScheme() {
|
||||
super(HTTPS_SCHEME_NAME, DEFAULT_PORT);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
import org.xbib.content.resource.IRI;
|
||||
|
||||
/**
|
||||
* Interface implemented by custom IRI scheme parsers.
|
||||
*/
|
||||
public interface Scheme {
|
||||
|
||||
String getName();
|
||||
|
||||
IRI normalize(IRI iri);
|
||||
|
||||
String normalizePath(String path);
|
||||
|
||||
int getDefaultPort();
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
package org.xbib.content.resource.scheme;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Static registry of custom IRI schemes.
|
||||
*/
|
||||
public final class SchemeRegistry {
|
||||
|
||||
private static SchemeRegistry registry;
|
||||
private final Map<String, Scheme> schemes;
|
||||
|
||||
SchemeRegistry() {
|
||||
schemes = new HashMap<>();
|
||||
schemes.put(HttpScheme.HTTP_SCHEME_NAME, new HttpScheme());
|
||||
schemes.put(HttpsScheme.HTTPS_SCHEME_NAME, new HttpsScheme());
|
||||
schemes.put(FtpScheme.FTP_SCHEME_NAME, new FtpScheme());
|
||||
}
|
||||
|
||||
public static SchemeRegistry getInstance() {
|
||||
if (registry == null) {
|
||||
registry = new SchemeRegistry();
|
||||
}
|
||||
return registry;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public boolean register(String schemeClass) throws ClassNotFoundException, IllegalAccessException,
|
||||
InstantiationException {
|
||||
Class<Scheme> klass = (Class<Scheme>) Thread.currentThread().getContextClassLoader().loadClass(schemeClass);
|
||||
return register(klass);
|
||||
}
|
||||
|
||||
public boolean register(Class<Scheme> schemeClass) throws IllegalAccessException,
|
||||
InstantiationException {
|
||||
Scheme scheme = schemeClass.newInstance();
|
||||
return register(scheme);
|
||||
}
|
||||
|
||||
public boolean register(Scheme scheme) {
|
||||
String name = scheme.getName();
|
||||
if (schemes.get(name) == null) {
|
||||
schemes.put(name.toLowerCase(), scheme);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public Scheme getScheme(String scheme) {
|
||||
if (scheme == null) {
|
||||
return null;
|
||||
}
|
||||
Scheme s = schemes.get(scheme.toLowerCase());
|
||||
return (s != null) ? s : new DefaultScheme(scheme);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
/**
|
||||
* Classes for resource schemes.
|
||||
*/
|
||||
package org.xbib.content.resource.scheme;
|
|
@ -1,195 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static java.nio.charset.CoderResult.OVERFLOW;
|
||||
import static java.nio.charset.CoderResult.UNDERFLOW;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.charset.UnmappableCharacterException;
|
||||
|
||||
/**
|
||||
* Decodes percent-encoded (%XX) Unicode text.
|
||||
*/
|
||||
public final class PercentDecoder {
|
||||
|
||||
/**
|
||||
* Written to with decoded chars by decoder.
|
||||
*/
|
||||
private final CharBuffer decodedCharBuf;
|
||||
private final CharsetDecoder decoder;
|
||||
/**
|
||||
* The decoded string for the current input.
|
||||
*/
|
||||
private final StringBuilder outputBuf = new StringBuilder();
|
||||
/**
|
||||
* bytes represented by the current sequence of %-triples. Resized as needed.
|
||||
*/
|
||||
private ByteBuffer encodedBuf;
|
||||
|
||||
/**
|
||||
* Construct a new PercentDecoder with default buffer sizes.
|
||||
*
|
||||
* @param charsetDecoder Charset to decode bytes into chars with
|
||||
* @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
|
||||
*/
|
||||
public PercentDecoder(CharsetDecoder charsetDecoder) {
|
||||
this(charsetDecoder, 16, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param charsetDecoder Charset to decode bytes into chars with
|
||||
* @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
|
||||
* @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
|
||||
*/
|
||||
public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
|
||||
int decodedCharBufSize) {
|
||||
encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
|
||||
decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
|
||||
decoder = charsetDecoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
|
||||
* "%20" for a space character
|
||||
* @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
|
||||
* @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
|
||||
* @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
|
||||
* detected
|
||||
*/
|
||||
public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
|
||||
outputBuf.setLength(0);
|
||||
// this is almost always an underestimate of the size needed:
|
||||
// only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
|
||||
outputBuf.ensureCapacity(input.length() / 8);
|
||||
encodedBuf.clear();
|
||||
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
char c = input.charAt(i);
|
||||
if (c != '%') {
|
||||
handleEncodedBytes();
|
||||
|
||||
outputBuf.append(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i + 2 >= input.length()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
|
||||
}
|
||||
|
||||
// grow the byte buf if needed
|
||||
if (encodedBuf.remaining() == 0) {
|
||||
ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
|
||||
encodedBuf.flip();
|
||||
largerBuf.put(encodedBuf);
|
||||
encodedBuf = largerBuf;
|
||||
}
|
||||
|
||||
// note that we advance i here as we consume chars
|
||||
int msBits = Character.digit(input.charAt(++i), 16);
|
||||
int lsBits = Character.digit(input.charAt(++i), 16);
|
||||
|
||||
if (msBits == -1 || lsBits == -1) {
|
||||
throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
|
||||
}
|
||||
|
||||
msBits <<= 4;
|
||||
msBits |= lsBits;
|
||||
|
||||
// msBits can only have 8 bits set, so cast is safe
|
||||
encodedBuf.put((byte) msBits);
|
||||
}
|
||||
|
||||
handleEncodedBytes();
|
||||
|
||||
return outputBuf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode any buffered encoded bytes and write them to the output buf.
|
||||
*/
|
||||
private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
|
||||
if (encodedBuf.position() == 0) {
|
||||
// nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
decoder.reset();
|
||||
CoderResult coderResult;
|
||||
|
||||
// switch to reading mode
|
||||
encodedBuf.flip();
|
||||
|
||||
// loop while we're filling up the decoded char buf, or there's any encoded bytes
|
||||
// decode() in practice seems to only consume bytes when it can decode an entire char...
|
||||
do {
|
||||
decodedCharBuf.clear();
|
||||
coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
|
||||
throwIfError(coderResult);
|
||||
appendDecodedChars();
|
||||
} while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
|
||||
|
||||
// final decode with end-of-input flag
|
||||
decodedCharBuf.clear();
|
||||
coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
|
||||
throwIfError(coderResult);
|
||||
|
||||
if (encodedBuf.hasRemaining()) {
|
||||
throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
|
||||
}
|
||||
if (coderResult != UNDERFLOW) {
|
||||
throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
|
||||
}
|
||||
|
||||
appendDecodedChars();
|
||||
|
||||
// we've finished the input, wrap it up
|
||||
encodedBuf.clear();
|
||||
flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Must only be called when the input encoded bytes buffer is empty.
|
||||
*/
|
||||
private void flush() throws MalformedInputException, UnmappableCharacterException {
|
||||
CoderResult coderResult;
|
||||
decodedCharBuf.clear();
|
||||
|
||||
coderResult = decoder.flush(decodedCharBuf);
|
||||
appendDecodedChars();
|
||||
|
||||
throwIfError(coderResult);
|
||||
|
||||
if (coderResult != UNDERFLOW) {
|
||||
throw new IllegalStateException("Decoder flush resulted in " + coderResult);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
|
||||
* CharacterCodingException.
|
||||
*
|
||||
* @param coderResult result to check
|
||||
* @throws MalformedInputException if result represents malformed input
|
||||
* @throws UnmappableCharacterException if result represents an unmappable character
|
||||
*/
|
||||
private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
|
||||
if (coderResult.isMalformed()) {
|
||||
throw new MalformedInputException(coderResult.length());
|
||||
}
|
||||
if (coderResult.isUnmappable()) {
|
||||
throw new UnmappableCharacterException(coderResult.length());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flip the decoded char buf and append it to the string bug.
|
||||
*/
|
||||
private void appendDecodedChars() {
|
||||
decodedCharBuf.flip();
|
||||
outputBuf.append(decodedCharBuf);
|
||||
}
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static java.lang.Character.isHighSurrogate;
|
||||
import static java.lang.Character.isLowSurrogate;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.charset.UnmappableCharacterException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
|
||||
*
|
||||
* This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
|
||||
* PercentEncoder instances.
|
||||
*/
|
||||
public final class PercentEncoder {
|
||||
|
||||
private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
|
||||
|
||||
private final BitSet safeChars;
|
||||
private final CharsetEncoder encoder;
|
||||
/**
|
||||
* Pre-allocate a string handler to make the common case of encoding to a string faster.
|
||||
*/
|
||||
private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
|
||||
private final ByteBuffer encodedBytes;
|
||||
private final CharBuffer unsafeCharsToEncode;
|
||||
|
||||
/**
|
||||
* @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
|
||||
* those chars set to true. Treated as read only.
|
||||
* @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder
|
||||
* instances
|
||||
* across threads.
|
||||
*/
|
||||
public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) {
|
||||
this.safeChars = safeChars;
|
||||
this.encoder = charsetEncoder;
|
||||
|
||||
// why is this a float? sigh.
|
||||
int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
|
||||
// need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
|
||||
encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
|
||||
unsafeCharsToEncode = CharBuffer.allocate(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param result result to check
|
||||
* @throws IllegalStateException if result is overflow
|
||||
* @throws MalformedInputException if result represents malformed input
|
||||
* @throws UnmappableCharacterException if result represents an unmappable character
|
||||
*/
|
||||
private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
|
||||
if (result.isOverflow()) {
|
||||
throw new IllegalStateException("Byte buffer overflow; this should not happen.");
|
||||
}
|
||||
if (result.isMalformed()) {
|
||||
throw new MalformedInputException(result.length());
|
||||
}
|
||||
if (result.isUnmappable()) {
|
||||
throw new UnmappableCharacterException(result.length());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the input and pass output chars to a handler.
|
||||
*
|
||||
* @param input input string
|
||||
* @param handler handler to call on each output character
|
||||
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
|
||||
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
|
||||
* detected
|
||||
*/
|
||||
public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws
|
||||
MalformedInputException, UnmappableCharacterException {
|
||||
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
|
||||
char c = input.charAt(i);
|
||||
|
||||
if (safeChars.get(c)) {
|
||||
handler.onOutputChar(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
// not a safe char
|
||||
unsafeCharsToEncode.clear();
|
||||
unsafeCharsToEncode.append(c);
|
||||
if (isHighSurrogate(c)) {
|
||||
if (input.length() > i + 1) {
|
||||
// get the low surrogate as well
|
||||
char lowSurrogate = input.charAt(i + 1);
|
||||
if (isLowSurrogate(lowSurrogate)) {
|
||||
unsafeCharsToEncode.append(lowSurrogate);
|
||||
i++;
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
|
||||
.toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
|
||||
.toHexString(lowSurrogate) + ")");
|
||||
}
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
|
||||
.toHexString(c) + ")");
|
||||
}
|
||||
}
|
||||
|
||||
flushUnsafeCharBuffer(handler);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the input and return the resulting text as a String.
|
||||
*
|
||||
* @param input input string
|
||||
* @return the input string with every character that's not in safeChars turned into its byte representation via the
|
||||
* instance's encoder and then percent-encoded
|
||||
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
|
||||
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
|
||||
* detected
|
||||
*/
|
||||
public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
|
||||
stringHandler.reset();
|
||||
stringHandler.ensureCapacity(input.length());
|
||||
encode(input, stringHandler);
|
||||
return stringHandler.getContents();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
|
||||
*
|
||||
* Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
|
||||
*/
|
||||
private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException,
|
||||
UnmappableCharacterException {
|
||||
// need to read from the char buffer, which was most recently written to
|
||||
unsafeCharsToEncode.flip();
|
||||
|
||||
encodedBytes.clear();
|
||||
|
||||
encoder.reset();
|
||||
CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
|
||||
checkResult(result);
|
||||
result = encoder.flush(encodedBytes);
|
||||
checkResult(result);
|
||||
|
||||
// read contents of bytebuffer
|
||||
encodedBytes.flip();
|
||||
|
||||
while (encodedBytes.hasRemaining()) {
|
||||
byte b = encodedBytes.get();
|
||||
handler.onOutputChar('%');
|
||||
handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
|
||||
handler.onOutputChar(HEX_CODE[b & 0xF]);
|
||||
}
|
||||
}
|
||||
|
||||
private static class StringBuilderPercentEncoderOutputHandler {
|
||||
|
||||
private final StringBuilder stringBuilder;
|
||||
|
||||
StringBuilderPercentEncoderOutputHandler() {
|
||||
stringBuilder = new StringBuilder();
|
||||
}
|
||||
|
||||
String getContents() {
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
stringBuilder.setLength(0);
|
||||
}
|
||||
|
||||
void ensureCapacity(int length) {
|
||||
stringBuilder.ensureCapacity(length);
|
||||
}
|
||||
|
||||
void onOutputChar(char c) {
|
||||
stringBuilder.append(c);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,472 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getFragmentEncoder;
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getMatrixEncoder;
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getPathEncoder;
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getQueryParamEncoder;
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getRegNameEncoder;
|
||||
import static org.xbib.content.resource.url.UrlPercentEncoders.getUnstructuredQueryEncoder;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Builder for urls with url-encoding applied to path, query param, etc.
|
||||
*
|
||||
* Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec (http://www.w3.org/TR/html401/interact/forms.html#form-content-type).
|
||||
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
|
||||
* HTTP-useful URLs.
|
||||
*/
|
||||
public final class UrlBuilder {
|
||||
|
||||
/**
|
||||
* IPv6 address, taken from <a href="http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings">Stack Overflow</a>.
|
||||
*/
|
||||
private static final Pattern IPV6_PATTERN = Pattern.compile(
|
||||
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
|
||||
|
||||
/**
|
||||
* IPv4 dotted quad.
|
||||
*/
|
||||
private static final Pattern IPV4_PATTERN = Pattern
|
||||
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
|
||||
|
||||
private final String scheme;
|
||||
|
||||
private final String host;
|
||||
|
||||
private final Integer port;
|
||||
|
||||
private final List<Pair<String, String>> queryParams = new ArrayList<>();
|
||||
private final List<PathSegment> pathSegments = new ArrayList<>();
|
||||
private final PercentEncoder pathEncoder = getPathEncoder();
|
||||
private final PercentEncoder regNameEncoder = getRegNameEncoder();
|
||||
private final PercentEncoder matrixEncoder = getMatrixEncoder();
|
||||
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
|
||||
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
|
||||
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
|
||||
/**
|
||||
* If this is non-null, queryParams must be empty, and vice versa.
|
||||
*/
|
||||
private String unstructuredQuery;
|
||||
private String fragment;
|
||||
|
||||
private boolean forceTrailingSlash = false;
|
||||
|
||||
/**
|
||||
* Create a URL with UTF-8 encoding.
|
||||
*
|
||||
* @param scheme scheme (e.g. http)
|
||||
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
|
||||
* @param port null or a positive integer
|
||||
*/
|
||||
private UrlBuilder(String scheme, String host, Integer port) {
|
||||
this.host = host;
|
||||
this.scheme = scheme;
|
||||
this.port = port;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a URL with an null port and UTF-8 encoding.
|
||||
*
|
||||
* @param scheme scheme (e.g. http)
|
||||
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
|
||||
* ([::1]), excluding IPvFuture since no one uses that in practice
|
||||
* @return a url builder
|
||||
* @see UrlBuilder#forHost(String scheme, String host, int port)
|
||||
*/
|
||||
public static UrlBuilder forHost(String scheme, String host) {
|
||||
return new UrlBuilder(scheme, host, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param scheme scheme (e.g. http)
|
||||
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
|
||||
* ([::1]), excluding IPvFuture since no one uses that in practice
|
||||
* @param port port
|
||||
* @return a url builder
|
||||
*/
|
||||
public static UrlBuilder forHost(String scheme, String host, int port) {
|
||||
return new UrlBuilder(scheme, host, port);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
|
||||
* query string apply.
|
||||
*
|
||||
* @param url url to initialize builder with
|
||||
* @return a UrlBuilder containing the host, path, etc. from the url
|
||||
* @throws CharacterCodingException if char decoding fails
|
||||
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
|
||||
*/
|
||||
public static UrlBuilder fromUrl(URL url) throws CharacterCodingException {
|
||||
return fromUrl(url, StandardCharsets.UTF_8.newDecoder());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a UrlBuilder initialized with the contents of a {@link URL}.
|
||||
*
|
||||
* The query string will be parsed into HTML4 query params if it can be separated into a
|
||||
* <code>&</code>-separated sequence of <code>key=value</code> pairs. The sequence of query params can then be
|
||||
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
|
||||
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
|
||||
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
|
||||
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
|
||||
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
|
||||
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
|
||||
*
|
||||
* @param url url to initialize builder with
|
||||
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
|
||||
* @return a UrlBuilder containing the host, path, etc. from the url
|
||||
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
|
||||
* report errors
|
||||
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
|
||||
*/
|
||||
public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws
|
||||
CharacterCodingException {
|
||||
|
||||
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
|
||||
// reg names must be encoded UTF-8
|
||||
PercentDecoder regNameDecoder;
|
||||
if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) {
|
||||
regNameDecoder = decoder;
|
||||
} else {
|
||||
regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
|
||||
}
|
||||
|
||||
Integer port = url.getPort();
|
||||
if (port == -1) {
|
||||
port = null;
|
||||
}
|
||||
|
||||
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
|
||||
|
||||
buildFromPath(builder, decoder, url);
|
||||
|
||||
buildFromQuery(builder, decoder, url);
|
||||
|
||||
if (url.getRef() != null) {
|
||||
builder.fragment(decoder.decode(url.getRef()));
|
||||
}
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Populate a url builder based on the query of an URL.
|
||||
*
|
||||
* @param builder builder
|
||||
* @param decoder decoder
|
||||
* @param url url
|
||||
* @throws CharacterCodingException if build fails
|
||||
*/
|
||||
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
|
||||
CharacterCodingException {
|
||||
if (url.getQuery() != null) {
|
||||
String q = url.getQuery();
|
||||
List<Pair<String, String>> pairs = new ArrayList<>();
|
||||
boolean parseOk = true;
|
||||
for (String queryChunk : q.split("&")) {
|
||||
String[] queryParamChunks = queryChunk.split("=");
|
||||
if (queryParamChunks.length != 2) {
|
||||
parseOk = false;
|
||||
break;
|
||||
}
|
||||
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
|
||||
decoder.decode(queryParamChunks[1])));
|
||||
}
|
||||
if (parseOk) {
|
||||
for (Pair<String, String> pair : pairs) {
|
||||
builder.queryParam(pair.getKey(), pair.getValue());
|
||||
}
|
||||
} else {
|
||||
builder.unstructuredQuery(decoder.decode(q));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Populate the path segments of a url builder from an URL.
|
||||
*
|
||||
* @param builder builder
|
||||
* @param decoder decoder
|
||||
* @param url url
|
||||
* @throws CharacterCodingException if build fails
|
||||
*/
|
||||
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
|
||||
CharacterCodingException {
|
||||
for (String pathChunk : url.getPath().split("/")) {
|
||||
if ("".equals(pathChunk)) {
|
||||
continue;
|
||||
}
|
||||
if (pathChunk.charAt(0) == ';') {
|
||||
builder.pathSegment("");
|
||||
for (String matrixChunk : pathChunk.substring(1).split(";")) {
|
||||
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
String[] matrixChunks = pathChunk.split(";");
|
||||
builder.pathSegment(decoder.decode(matrixChunks[0]));
|
||||
for (int i = 1; i < matrixChunks.length; i++) {
|
||||
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
|
||||
CharacterCodingException {
|
||||
String[] mtxPair = pathMatrixChunk.split("=");
|
||||
if (mtxPair.length != 2) {
|
||||
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
|
||||
}
|
||||
|
||||
String mtxName = mtxPair[0];
|
||||
String mtxVal = mtxPair[1];
|
||||
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a path segment.
|
||||
*
|
||||
* @param segment a path segment
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder pathSegment(String segment) {
|
||||
pathSegments.add(new PathSegment(segment));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
|
||||
*
|
||||
* @param segments path segments
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder pathSegments(String... segments) {
|
||||
for (String segment : segments) {
|
||||
pathSegment(segment);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an HTML query parameter. Query parameters will be encoded in the order added.
|
||||
*
|
||||
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
|
||||
* http://www.w3.org/TR/html401/interact/forms.html#form-content-type.
|
||||
*
|
||||
* If you use this method to build a query string, or created this builder from a url with a query string that can
|
||||
* successfully be parsed into query param pairs, you cannot subsequently use {@link
|
||||
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
|
||||
*
|
||||
* @param name param name
|
||||
* @param value param value
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder queryParam(String name, String value) {
|
||||
if (unstructuredQuery != null) {
|
||||
throw new IllegalStateException(
|
||||
"Cannot call queryParam() when this already has an unstructured query specified");
|
||||
}
|
||||
|
||||
queryParams.add(Pair.of(name, value));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
|
||||
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
|
||||
* that query.
|
||||
*
|
||||
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
|
||||
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
|
||||
* CharsetDecoder)}.
|
||||
*
|
||||
* @param query Complete URI query, as specified by https://tools.ietf.org/html/rfc3986#section-3.4
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder unstructuredQuery(String query) {
|
||||
if (!queryParams.isEmpty()) {
|
||||
throw new IllegalStateException(
|
||||
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
|
||||
}
|
||||
|
||||
unstructuredQuery = query;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the unstructured query and any query params.
|
||||
*
|
||||
* Since the query / query param situation is a little complicated, this method will let you remove all query
|
||||
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
|
||||
* builder, and then re-doing its query params, for instance.
|
||||
*
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder clearQuery() {
|
||||
queryParams.clear();
|
||||
unstructuredQuery = null;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
|
||||
* root. Matrix params will be encoded in the order added.
|
||||
*
|
||||
* @param name param name
|
||||
* @param value param value
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder matrixParam(String name, String value) {
|
||||
if (pathSegments.isEmpty()) {
|
||||
// create an empty path segment to represent a matrix param applied to the root
|
||||
pathSegment("");
|
||||
}
|
||||
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
|
||||
seg.matrixParams.add(Pair.of(name, value));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the fragment.
|
||||
*
|
||||
* @param fragment fragment string
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder fragment(String fragment) {
|
||||
this.fragment = fragment;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force the generated URL to have a trailing slash at the end of the path.
|
||||
*
|
||||
* @return this
|
||||
*/
|
||||
public UrlBuilder forceTrailingSlash() {
|
||||
forceTrailingSlash = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the current builder state into a URL string.
|
||||
*
|
||||
* @return a well-formed URL string
|
||||
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
|
||||
*/
|
||||
public String toUrlString() throws CharacterCodingException {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
|
||||
buf.append(scheme);
|
||||
buf.append("://");
|
||||
|
||||
buf.append(encodeHost(host));
|
||||
if (port != null) {
|
||||
buf.append(':');
|
||||
buf.append(port);
|
||||
}
|
||||
|
||||
for (PathSegment pathSegment : pathSegments) {
|
||||
buf.append('/');
|
||||
buf.append(pathEncoder.encode(pathSegment.segment));
|
||||
|
||||
for (Pair<String, String> matrixParam : pathSegment.matrixParams) {
|
||||
buf.append(';');
|
||||
buf.append(matrixEncoder.encode(matrixParam.getKey()));
|
||||
buf.append('=');
|
||||
buf.append(matrixEncoder.encode(matrixParam.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
if (forceTrailingSlash) {
|
||||
buf.append('/');
|
||||
}
|
||||
|
||||
if (!queryParams.isEmpty()) {
|
||||
buf.append("?");
|
||||
Iterator<Pair<String, String>> qpIter = queryParams.iterator();
|
||||
while (qpIter.hasNext()) {
|
||||
Pair<String, String> queryParam = qpIter.next();
|
||||
buf.append(queryParamEncoder.encode(queryParam.getKey()));
|
||||
buf.append('=');
|
||||
buf.append(queryParamEncoder.encode(queryParam.getValue()));
|
||||
if (qpIter.hasNext()) {
|
||||
buf.append('&');
|
||||
}
|
||||
}
|
||||
} else if (unstructuredQuery != null) {
|
||||
buf.append("?");
|
||||
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
|
||||
}
|
||||
|
||||
if (fragment != null) {
|
||||
buf.append('#');
|
||||
buf.append(fragmentEncoder.encode(fragment));
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param host original host string
|
||||
* @return host encoded as in RFC 3986 section 3.2.2
|
||||
*/
|
||||
private String encodeHost(String host) throws CharacterCodingException {
|
||||
// matching order: IP-literal, IPv4, reg-name
|
||||
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
|
||||
return host;
|
||||
}
|
||||
|
||||
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
|
||||
return regNameEncoder.encode(host);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bundle of a path segment name and any associated matrix params.
|
||||
*/
|
||||
private static class PathSegment {
|
||||
private final String segment;
|
||||
private final List<Pair<String, String>> matrixParams = new ArrayList<>();
|
||||
|
||||
PathSegment(String segment) {
|
||||
this.segment = segment;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Pair<K, V> {
|
||||
|
||||
K key;
|
||||
|
||||
V value;
|
||||
|
||||
Pair(K key, V value) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static <K, V> Pair<K, V> of(K key, V value) {
|
||||
return new Pair<>(key, value);
|
||||
}
|
||||
|
||||
K getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
V getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -1,148 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import org.xbib.content.resource.text.CharUtils;
|
||||
import org.xbib.content.resource.text.Filter;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.FilterReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Performs URL Percent Encoding.
|
||||
*/
|
||||
public final class UrlEncoding {
|
||||
|
||||
private static final char[] HEX = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
|
||||
|
||||
private UrlEncoding() {
|
||||
}
|
||||
|
||||
private static void encode(Appendable sb, byte... bytes) throws IOException {
|
||||
encode(sb, 0, bytes.length, bytes);
|
||||
}
|
||||
|
||||
private static void encode(Appendable sb, int offset, int length, byte... bytes) throws IOException {
|
||||
for (int n = offset, i = 0; n < bytes.length && i < length; n++, i++) {
|
||||
byte c = bytes[n];
|
||||
sb.append("%");
|
||||
sb.append(HEX[(c >> 4) & 0x0f]);
|
||||
sb.append(HEX[c & 0x0f]);
|
||||
}
|
||||
}
|
||||
|
||||
public static String encode(CharSequence s, Filter filter) throws IOException {
|
||||
return encode(s, new Filter[]{filter});
|
||||
}
|
||||
|
||||
public static String encode(CharSequence s, Filter... filters) throws IOException {
|
||||
if (s == null) {
|
||||
return null;
|
||||
}
|
||||
return encode(s, "utf-8", filters);
|
||||
}
|
||||
|
||||
private static boolean check(int codepoint, Filter... filters) {
|
||||
for (Filter filter : filters) {
|
||||
if (filter.accept(codepoint)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String encode(CharSequence s, String enc, Filter... filters) throws IOException {
|
||||
if (s == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int n = 0; n < s.length(); n++) {
|
||||
char c = s.charAt(n);
|
||||
if (!CharUtils.isHighSurrogate(c) && check(c, filters)) {
|
||||
encode(sb, String.valueOf(c).getBytes(enc));
|
||||
} else if (CharUtils.isHighSurrogate(c)) {
|
||||
if (check(c, filters)) {
|
||||
String buf = String.valueOf(c) + s.charAt(++n);
|
||||
byte[] b = buf.getBytes(enc);
|
||||
encode(sb, b);
|
||||
} else {
|
||||
sb.append(c);
|
||||
sb.append(s.charAt(++n));
|
||||
}
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String decode(String string) throws IOException {
|
||||
String e = string;
|
||||
char[] buf = new char[e.length()];
|
||||
try (DecodingReader r = new DecodingReader(new StringReader(e))) {
|
||||
int l = r.read(buf);
|
||||
e = new String(buf, 0, l);
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static class DecodingReader extends FilterReader {
|
||||
|
||||
DecodingReader(Reader in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int c = super.read();
|
||||
if (c == '%') {
|
||||
int c1 = super.read();
|
||||
int c2 = super.read();
|
||||
return decode((char) c1, (char) c2);
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] b, int off, int len) throws IOException {
|
||||
int n = off;
|
||||
int i;
|
||||
while ((i = read()) != -1 && n < off + len) {
|
||||
b[n++] = (char) i;
|
||||
}
|
||||
return n - off;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] b) throws IOException {
|
||||
return read(b, 0, b.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
long i = 0;
|
||||
int c;
|
||||
for (; i < n; i++) {
|
||||
c = read();
|
||||
if (c == -1) {
|
||||
throw new EOFException();
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
private static byte decode(char c, int shift) {
|
||||
return (byte) ((((c >= '0' && c <= '9') ? c - '0' : (c >= 'A' && c <= 'F') ? c - 'A' + 10
|
||||
: (c >= 'a' && c <= 'f') ? c - 'a' + 10 : -1) & 0xf) << shift);
|
||||
}
|
||||
|
||||
private static byte decode(char c1, char c2) {
|
||||
return (byte) (decode(c1, 4) | decode(c2, 0));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,166 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static java.nio.charset.CodingErrorAction.REPLACE;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* See RFC 3986, RFC 1738 and <a href="http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding">Lunatech research</a>.
|
||||
*/
|
||||
public final class UrlPercentEncoders {
|
||||
|
||||
/**
|
||||
* An encoder for RFC 3986 reg-names.
|
||||
*/
|
||||
|
||||
private static final BitSet REG_NAME_BIT_SET = new BitSet();
|
||||
|
||||
private static final BitSet PATH_BIT_SET = new BitSet();
|
||||
private static final BitSet MATRIX_BIT_SET = new BitSet();
|
||||
private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
|
||||
private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
|
||||
private static final BitSet FRAGMENT_BIT_SET = new BitSet();
|
||||
|
||||
static {
|
||||
// RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
|
||||
// Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
|
||||
addUnreserved(REG_NAME_BIT_SET);
|
||||
addSubdelims(REG_NAME_BIT_SET);
|
||||
|
||||
// Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
|
||||
addPChar(PATH_BIT_SET);
|
||||
PATH_BIT_SET.clear((int) ';');
|
||||
|
||||
// Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?')
|
||||
// are already excluded.
|
||||
addPChar(MATRIX_BIT_SET);
|
||||
MATRIX_BIT_SET.clear((int) ';');
|
||||
MATRIX_BIT_SET.clear((int) '=');
|
||||
|
||||
/*
|
||||
* At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
|
||||
* specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
|
||||
*/
|
||||
addQuery(UNSTRUCTURED_QUERY_BIT_SET);
|
||||
UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
|
||||
|
||||
/*
|
||||
* Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
|
||||
* pairs can be used.
|
||||
*/
|
||||
QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
|
||||
QUERY_PARAM_BIT_SET.clear((int) '=');
|
||||
QUERY_PARAM_BIT_SET.clear((int) '&');
|
||||
|
||||
addFragment(FRAGMENT_BIT_SET);
|
||||
}
|
||||
|
||||
private UrlPercentEncoders() {
|
||||
}
|
||||
|
||||
public static PercentEncoder getRegNameEncoder() {
|
||||
return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
public static PercentEncoder getPathEncoder() {
|
||||
return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
public static PercentEncoder getMatrixEncoder() {
|
||||
return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
public static PercentEncoder getUnstructuredQueryEncoder() {
|
||||
return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
public static PercentEncoder getQueryParamEncoder() {
|
||||
return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
public static PercentEncoder getFragmentEncoder() {
|
||||
return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Add code points for 'fragment' chars.
|
||||
*
|
||||
* @param fragmentBitSet bit set
|
||||
*/
|
||||
private static void addFragment(BitSet fragmentBitSet) {
|
||||
addPChar(fragmentBitSet);
|
||||
fragmentBitSet.set((int) '/');
|
||||
fragmentBitSet.set((int) '?');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add code points for 'query' chars.
|
||||
*
|
||||
* @param queryBitSet bit set
|
||||
*/
|
||||
private static void addQuery(BitSet queryBitSet) {
|
||||
addPChar(queryBitSet);
|
||||
queryBitSet.set((int) '/');
|
||||
queryBitSet.set((int) '?');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add code points for 'pchar' chars.
|
||||
*
|
||||
* @param bs bitset
|
||||
*/
|
||||
private static void addPChar(BitSet bs) {
|
||||
addUnreserved(bs);
|
||||
addSubdelims(bs);
|
||||
bs.set((int) ':');
|
||||
bs.set((int) '@');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add codepoints for 'unreserved' chars.
|
||||
*
|
||||
* @param bs bitset to add codepoints to
|
||||
*/
|
||||
private static void addUnreserved(BitSet bs) {
|
||||
|
||||
for (int i = 'a'; i <= 'z'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
for (int i = 'A'; i <= 'Z'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
for (int i = '0'; i <= '9'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
bs.set((int) '-');
|
||||
bs.set((int) '.');
|
||||
bs.set((int) '_');
|
||||
bs.set((int) '~');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add codepoints for 'sub-delims' chars.
|
||||
*
|
||||
* @param bs bitset to add codepoints to
|
||||
*/
|
||||
private static void addSubdelims(BitSet bs) {
|
||||
bs.set((int) '!');
|
||||
bs.set((int) '$');
|
||||
bs.set((int) '&');
|
||||
bs.set((int) '\'');
|
||||
bs.set((int) '(');
|
||||
bs.set((int) ')');
|
||||
bs.set((int) '*');
|
||||
bs.set((int) '+');
|
||||
bs.set((int) ',');
|
||||
bs.set((int) ';');
|
||||
bs.set((int) '=');
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
/**
|
||||
* Classes for URL encoding and decoding.
|
||||
*/
|
||||
package org.xbib.content.resource.url;
|
|
@ -1,87 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static java.nio.charset.CodingErrorAction.REPLACE;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.UnmappableCharacterException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class PercentEncoderTest {
|
||||
|
||||
private PercentEncoder alnum;
|
||||
private PercentEncoder alnum16;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
BitSet bs = new BitSet();
|
||||
for (int i = 'a'; i <= 'z'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
for (int i = 'A'; i <= 'Z'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
for (int i = '0'; i <= '9'; i++) {
|
||||
bs.set(i);
|
||||
}
|
||||
|
||||
this.alnum = new PercentEncoder(bs, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
this.alnum16 = new PercentEncoder(bs, StandardCharsets.UTF_16BE.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoesntEncodeSafe() throws CharacterCodingException {
|
||||
BitSet set = new BitSet();
|
||||
for (int i = 'a'; i <= 'z'; i++) {
|
||||
set.set(i);
|
||||
}
|
||||
|
||||
PercentEncoder pe = new PercentEncoder(set, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
|
||||
.onUnmappableCharacter(REPLACE));
|
||||
assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException {
|
||||
assertEquals("abc%20123", alnum.encode("abc 123"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException {
|
||||
assertEquals("%20abc%20", alnum.encode(" abc "));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeUtf8() throws CharacterCodingException {
|
||||
// 1 UTF-16 char (unicode snowman)
|
||||
assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeUtf8SurrogatePair() throws CharacterCodingException {
|
||||
// musical G clef: 1d11e, has to be represented in surrogate pair form
|
||||
assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeUtf16() throws CharacterCodingException {
|
||||
// 1 UTF-16 char (unicode snowman)
|
||||
assertEquals("snowman%26%03", alnum16.encode("snowman\u2603"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException {
|
||||
// musical G clef: 1d11e, has to be represented in surrogate pair form
|
||||
assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e"));
|
||||
}
|
||||
}
|
|
@ -1,433 +0,0 @@
|
|||
package org.xbib.content.resource.url;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.xbib.content.resource.url.UrlBuilder.forHost;
|
||||
import static org.xbib.content.resource.url.UrlBuilder.fromUrl;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class UrlBuilderTest {
|
||||
|
||||
private static void assertUrlEquals(String expected, String actual)
|
||||
throws URISyntaxException, MalformedURLException {
|
||||
assertEquals(expected, actual);
|
||||
assertEquals(expected, new URI(actual).toString());
|
||||
assertEquals(expected, new URL(actual).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoUrlParts() throws Exception {
|
||||
assertUrlEquals("http://foo.com", forHost("http", "foo.com").toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithPort() throws Exception {
|
||||
assertUrlEquals("http://foo.com:33", forHost("http", "foo.com", 33).toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimplePath() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.pathSegment("seg1").pathSegment("seg2");
|
||||
assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPathWithReserved() throws Exception {
|
||||
// RFC 1738 S3.3
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.pathSegment("seg/;?ment").pathSegment("seg=&2");
|
||||
assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPathSegments() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.pathSegments("seg1", "seg2", "seg3");
|
||||
assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatrixWithoutPathHasLeadingSlash() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.matrixParam("foo", "bar");
|
||||
assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatrixWithReserved() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com")
|
||||
.pathSegment("foo")
|
||||
.matrixParam("foo", "bar")
|
||||
.matrixParam("res;=?#/erved", "value")
|
||||
.pathSegment("baz");
|
||||
assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUrlEncodedPathSegmentUtf8() throws Exception {
|
||||
// 1 UTF-16 char
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.pathSegment("snowman").pathSegment("\u2603");
|
||||
assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
// musical G clef: 1d11e, has to be represented in surrogate pair form
|
||||
ub.pathSegment("clef").pathSegment("\ud834\udd1e");
|
||||
assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQueryParamNoPath() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.queryParam("foo", "bar");
|
||||
String s = ub.toUrlString();
|
||||
assertUrlEquals("http://foo.com?foo=bar", s);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQueryParamsDuplicated() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.queryParam("foo", "bar");
|
||||
ub.queryParam("foo", "bar2");
|
||||
ub.queryParam("baz", "quux");
|
||||
ub.queryParam("baz", "quux2");
|
||||
assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeQueryParams() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.queryParam("foo", "bar&=#baz");
|
||||
ub.queryParam("foo", "bar?/2");
|
||||
assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodeQueryParamWithSpaceAndPlus() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.queryParam("foo", "spa ce");
|
||||
ub.queryParam("fo+o", "plus+");
|
||||
assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPlusInVariousParts() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
|
||||
ub.pathSegment("has+plus")
|
||||
.matrixParam("plusMtx", "pl+us")
|
||||
.queryParam("plusQp", "pl+us")
|
||||
.fragment("plus+frag");
|
||||
|
||||
assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFragment() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "foo.com");
|
||||
ub.queryParam("foo", "bar");
|
||||
ub.fragment("#frag/?");
|
||||
assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllParts() throws Exception {
|
||||
UrlBuilder ub = forHost("https", "foo.bar.com", 3333);
|
||||
ub.pathSegment("foo");
|
||||
ub.pathSegment("bar");
|
||||
ub.matrixParam("mtx1", "val1");
|
||||
ub.matrixParam("mtx2", "val2");
|
||||
ub.queryParam("q1", "v1");
|
||||
ub.queryParam("q2", "v2");
|
||||
ub.fragment("zomg it's a fragment");
|
||||
|
||||
assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment",
|
||||
ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIPv4Literal() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "127.0.0.1");
|
||||
assertUrlEquals("http://127.0.0.1", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadIPv4LiteralDoesntChoke() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "300.100.50.1");
|
||||
assertUrlEquals("http://300.100.50.1", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIPv6LiteralLocalhost() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "[::1]");
|
||||
assertUrlEquals("http://[::1]", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIPv6Literal() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "[2001:db8:85a3::8a2e:370:7334]");
|
||||
assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodedRegNameSingleByte() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "host?name;");
|
||||
assertUrlEquals("http://host%3Fname;", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEncodedRegNameMultiByte() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "snow\u2603man");
|
||||
assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForceTrailingSlash() throws Exception {
|
||||
UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c");
|
||||
|
||||
assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForceTrailingSlashWithQueryParams() throws Exception {
|
||||
UrlBuilder ub =
|
||||
forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar");
|
||||
|
||||
assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws Exception {
|
||||
UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1");
|
||||
|
||||
assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntermingledMatrixParamsAndPathSegments() throws Exception {
|
||||
|
||||
UrlBuilder ub = forHost("http", "foo.com")
|
||||
.pathSegments("seg1", "seg2")
|
||||
.matrixParam("m1", "v1")
|
||||
.pathSegment("seg3")
|
||||
.matrixParam("m2", "v2");
|
||||
|
||||
assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEverything() throws Exception {
|
||||
String orig =
|
||||
"https://foo.bar.com:33/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragm";
|
||||
assertUrlBuilderRoundtrip(orig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPath() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPathAndSlash() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithPort() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com:1234");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPathSegent() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedHost() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://f%20oo.com/bar");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedPathSegment() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedMatrixParam() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedQueryParam() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedQueryParamDelimiter() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEncodedFragment() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithMalformedMatrixPair() throws Exception {
|
||||
try {
|
||||
fromUrl(new URL("http://foo.com/foo;m1=v1=v2"));
|
||||
fail();
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("Malformed matrix param: <m1=v1=v2>", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPathSegmentWithMatrixParams() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPathWithMatrixParams() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithEmptyPathWithMultipleMatrixParams() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlWithPathSegmentEndingWithSemicolon() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException {
|
||||
try {
|
||||
fromUrl(new URL("http://foo.com/fo%2o"));
|
||||
fail();
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertEquals("Invalid %-tuple <%2o>", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlMalformedQueryParamMultiValues() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlMalformedQueryParamNoValue() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFromUrlUnstructuredQueryWithEscapedChars() throws Exception {
|
||||
assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCantUseQueryParamAfterQuery() {
|
||||
UrlBuilder ub = forHost("http", "foo.com").unstructuredQuery("q");
|
||||
|
||||
try {
|
||||
ub.queryParam("foo", "bar");
|
||||
fail();
|
||||
} catch (IllegalStateException e) {
|
||||
assertEquals("Cannot call queryParam() when this already has an unstructured query specified",
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCantUseQueryAfterQueryParam() {
|
||||
UrlBuilder ub = forHost("http", "foo.com").queryParam("foo", "bar");
|
||||
|
||||
try {
|
||||
ub.unstructuredQuery("q");
|
||||
|
||||
fail();
|
||||
} catch (IllegalStateException e) {
|
||||
assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified",
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnstructuredQueryWithNoSpecialChars() throws Exception {
|
||||
assertUrlEquals("http://foo.com?q", forHost("http", "foo.com").unstructuredQuery("q").toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnstructuredQueryWithOkSpecialChars() throws Exception {
|
||||
assertUrlEquals("http://foo.com?q?/&=", forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnstructuredQueryWithEscapedSpecialChars() throws Exception {
|
||||
assertUrlEquals("http://foo.com?q%23%2B", forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClearQueryRemovesQueryParam() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "host")
|
||||
.queryParam("foo", "bar")
|
||||
.clearQuery();
|
||||
assertUrlEquals("http://host", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClearQueryRemovesUnstructuredQuery() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "host")
|
||||
.unstructuredQuery("foobar")
|
||||
.clearQuery();
|
||||
assertUrlEquals("http://host", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClearQueryAfterQueryParamAllowsQuery() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "host")
|
||||
.queryParam("foo", "bar")
|
||||
.clearQuery()
|
||||
.unstructuredQuery("foobar");
|
||||
assertUrlEquals("http://host?foobar", ub.toUrlString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClearQueryAfterQueryAllowsQueryParam() throws Exception {
|
||||
UrlBuilder ub = forHost("http", "host")
|
||||
.unstructuredQuery("foobar")
|
||||
.clearQuery()
|
||||
.queryParam("foo", "bar");
|
||||
assertUrlEquals("http://host?foo=bar", ub.toUrlString());
|
||||
}
|
||||
|
||||
private void assertUrlBuilderRoundtrip(String url)
|
||||
throws MalformedURLException, CharacterCodingException, URISyntaxException {
|
||||
assertUrlBuilderRoundtrip(url, url);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param origUrl the url that will be used to create a URL
|
||||
* @param finalUrl the URL string it should end up as
|
||||
*/
|
||||
private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl)
|
||||
throws MalformedURLException, CharacterCodingException, URISyntaxException {
|
||||
assertUrlEquals(finalUrl, fromUrl(new URL(origUrl)).toUrlString());
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
/**
|
||||
* Classes for testing URL pocessing.
|
||||
*/
|
||||
package org.xbib.content.resource.url;
|
|
@ -1,4 +1,4 @@
|
|||
dependencies {
|
||||
compile project(':content-core')
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${versions.jackson}"
|
||||
}
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${project.property('jackson.version')}"
|
||||
}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
dependencies {
|
||||
compile project(':content-core')
|
||||
compile project(':content-resource')
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-xml:${versions.jackson}"
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-xml:${project.property('jackson.version')}"
|
||||
}
|
||||
|
||||
|
||||
tasks.withType(JavaCompile) {
|
||||
options.compilerArgs << "-Xlint:all" << "-profile" << "compact2"
|
||||
}
|
|
@ -206,12 +206,12 @@ public class XmlXContentGenerator extends AbstractXContentGenerator {
|
|||
|
||||
@Override
|
||||
public void writeString(String text) throws IOException {
|
||||
generator.writeString(XMLUtil.sanitizeXml10(text));
|
||||
generator.writeString(XMLUtil.sanitize(text));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeString(char[] text, int offset, int len) throws IOException {
|
||||
generator.writeString(XMLUtil.sanitizeXml10(text, offset, len));
|
||||
generator.writeString(XMLUtil.sanitize(new String(text, offset, len)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -265,10 +265,24 @@ public final class XMLUtil {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String sanitizeToLineFeed(CharSequence string) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0, len = string.length(); i < len; i++) {
|
||||
char c = string.charAt(i);
|
||||
boolean legal = c == '\u0009' || c == '\n'
|
||||
|| (c >= '\u0020' && c <= '\uD7FF')
|
||||
|| (c >= '\uE000' && c <= '\uFFFD');
|
||||
if (legal) {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does not work.
|
||||
* The pattern matching does not work.
|
||||
*
|
||||
* @param sequence the charatcer sequence
|
||||
* @param sequence the character sequence
|
||||
* @return sanitized string
|
||||
*/
|
||||
public static String sanitizeXml10(CharSequence sequence) {
|
||||
|
|
|
@ -189,7 +189,7 @@ public class XContentXmlBuilderTest extends Assert {
|
|||
QName root = new QName("root");
|
||||
XContentBuilder builder = XmlXContent.contentBuilder(new XmlXParams(root));
|
||||
builder.startObject().field("Hello", "World\u001b").endObject();
|
||||
assertEquals("<root><Hello>World\ufffd</Hello></root>", builder.string());
|
||||
assertEquals("<root><Hello>World</Hello></root>", builder.string());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
dependencies {
|
||||
compile project(':content-core')
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}"
|
||||
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${project.property('jackson.version')}"
|
||||
}
|
|
@ -1,3 +1,6 @@
|
|||
group = org.xbib
|
||||
name = content
|
||||
version = 1.0.7
|
||||
version = 1.1.0
|
||||
|
||||
jackson.version = 2.8.4
|
||||
xbib-net.version = 1.0.0
|
||||
|
|
|
@ -6,7 +6,7 @@ task xbibUpload(type: Upload, dependsOn: build) {
|
|||
if (project.hasProperty('xbibUsername')) {
|
||||
mavenDeployer {
|
||||
configuration = configurations.wagon
|
||||
repository(url: uri('scpexe://xbib.org/repository')) {
|
||||
repository(url: uri('sftp://xbib.org/repository')) {
|
||||
authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
|
||||
}
|
||||
}
|
||||
|
@ -64,3 +64,7 @@ task sonatypeUpload(type: Upload, dependsOn: build) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
nexusStaging {
|
||||
packageGroup = "org.xbib"
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
tasks.withType(FindBugs) {
|
||||
ignoreFailures = true
|
||||
reports {
|
||||
xml.enabled = true
|
||||
html.enabled = false
|
||||
xml.enabled = false
|
||||
html.enabled = true
|
||||
}
|
||||
}
|
||||
tasks.withType(Pmd) {
|
||||
|
@ -22,10 +22,8 @@ tasks.withType(Checkstyle) {
|
|||
|
||||
jacocoTestReport {
|
||||
reports {
|
||||
xml.enabled true
|
||||
csv.enabled false
|
||||
xml.destination "${buildDir}/reports/jacoco-xml"
|
||||
html.destination "${buildDir}/reports/jacoco-html"
|
||||
xml.enabled = true
|
||||
csv.enabled = false
|
||||
}
|
||||
}
|
||||
|
||||
|
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
4
gradle/wrapper/gradle-wrapper.properties
vendored
4
gradle/wrapper/gradle-wrapper.properties
vendored
|
@ -1,6 +1,6 @@
|
|||
#Sat Dec 03 23:47:13 CET 2016
|
||||
#Mon Aug 14 19:27:00 CEST 2017
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip
|
||||
|
|
6
gradlew
vendored
6
gradlew
vendored
|
@ -33,11 +33,11 @@ DEFAULT_JVM_OPTS=""
|
|||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn ( ) {
|
||||
warn () {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die ( ) {
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
|
@ -155,7 +155,7 @@ if $cygwin ; then
|
|||
fi
|
||||
|
||||
# Escape application args
|
||||
save ( ) {
|
||||
save () {
|
||||
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
||||
echo " "
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue