add xbib net dependency, update to Gradle 4.1, add some convenience methods

This commit is contained in:
Jörg Prante 2017-08-14 23:00:13 +02:00
parent 73afbd806f
commit 14643b140f
46 changed files with 347 additions and 2019 deletions

View file

@ -1,16 +1,25 @@
plugins { plugins {
id "org.sonarqube" version "2.2" id "org.sonarqube" version "2.5"
id "org.ajoberstar.github-pages" version "1.6.0-rc.1" id "org.xbib.gradle.plugin.asciidoctor" version "1.5.4.1.0"
id "org.xbib.gradle.plugin.jbake" version "1.2.1" id "io.codearte.nexus-staging" version "0.7.0"
} }
ext { printf "Host: %s\nOS: %s %s %s\nJVM: %s %s %s %s\nGroovy: %s\nGradle: %s\n" +
versions = [ "Build: group: ${project.group} name: ${project.name} version: ${project.version}\n",
'jackson' : '2.8.4' InetAddress.getLocalHost(),
] System.getProperty("os.name"),
} System.getProperty("os.arch"),
System.getProperty("os.version"),
System.getProperty("java.version"),
System.getProperty("java.vm.version"),
System.getProperty("java.vm.vendor"),
System.getProperty("java.vm.name"),
GroovySystem.getVersion(),
gradle.gradleVersion
apply plugin: 'build-dashboard' apply plugin: 'build-dashboard'
apply plugin: "io.codearte.nexus-staging"
allprojects { allprojects {
@ -21,18 +30,20 @@ allprojects {
apply plugin: 'pmd' apply plugin: 'pmd'
apply plugin: 'checkstyle' apply plugin: 'checkstyle'
apply plugin: "jacoco" apply plugin: "jacoco"
apply plugin: 'org.xbib.gradle.plugin.asciidoctor'
repositories { repositories {
mavenCentral() mavenCentral()
} }
configurations { configurations {
asciidoclet
wagon wagon
} }
dependencies { dependencies {
testCompile 'junit:junit:4.12' testCompile 'junit:junit:4.12'
wagon 'org.apache.maven.wagon:wagon-ssh-external:2.10' wagon 'org.apache.maven.wagon:wagon-ssh:2.12'
} }
sourceCompatibility = JavaVersion.VERSION_1_8 sourceCompatibility = JavaVersion.VERSION_1_8
@ -43,6 +54,12 @@ allprojects {
options.compilerArgs << "-Xlint:all" << "-profile" << "compact1" options.compilerArgs << "-Xlint:all" << "-profile" << "compact1"
} }
jar {
manifest {
attributes('Implementation-Version': project.version)
}
}
test { test {
testLogging { testLogging {
showStandardStreams = false showStandardStreams = false
@ -50,6 +67,10 @@ allprojects {
} }
} }
clean {
delete 'out'
}
task sourcesJar(type: Jar, dependsOn: classes) { task sourcesJar(type: Jar, dependsOn: classes) {
classifier 'sources' classifier 'sources'
from sourceSets.main.allSource from sourceSets.main.allSource

View file

@ -1,3 +1,3 @@
dependencies { dependencies {
compile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" compile "com.fasterxml.jackson.core:jackson-core:${project.property('jackson.version')}"
} }

View file

@ -2,6 +2,8 @@ package org.xbib.content;
import org.xbib.content.io.BytesReference; import org.xbib.content.io.BytesReference;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.math.BigDecimal; import java.math.BigDecimal;
@ -10,7 +12,7 @@ import java.math.BigInteger;
/** /**
* *
*/ */
public interface XContentGenerator { public interface XContentGenerator extends Flushable, Closeable {
XContent content(); XContent content();
@ -115,7 +117,4 @@ public interface XContentGenerator {
void copyCurrentStructure(XContentParser parser) throws IOException; void copyCurrentStructure(XContentParser parser) throws IOException;
void flush() throws IOException;
void close() throws IOException;
} }

View file

@ -40,6 +40,14 @@ public class BytesArray implements BytesReference {
this.length = length; this.length = length;
} }
public void write(byte[] b) {
byte[] c = new byte[length + b.length];
System.arraycopy(bytes, 0, c, 0, length);
System.arraycopy(b, 0, c, bytes.length, b.length);
this.bytes = c;
this.offset = 0;
this.length = c.length;
}
@Override @Override
public byte get(int index) { public byte get(int index) {

View file

@ -1,7 +1,10 @@
package org.xbib.content.settings; package org.xbib.content.settings;
import org.xbib.content.XContent; import org.xbib.content.XContent;
import org.xbib.content.XContentGenerator;
import org.xbib.content.XContentParser; import org.xbib.content.XContentParser;
import org.xbib.content.io.BytesReference;
import org.xbib.content.io.BytesStreamOutput;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -24,6 +27,32 @@ public abstract class AbstractSettingsLoader implements SettingsLoader {
} }
} }
public Map<String, String> load(BytesReference bytesReference) throws IOException {
try (XContentParser parser = content().createParser(bytesReference)) {
return load(parser);
}
}
public String flatMapAsString(BytesReference bytesReference) throws IOException {
try (XContentParser parser = content().createParser(bytesReference);
BytesStreamOutput bytesStreamOutput = new BytesStreamOutput();
XContentGenerator generator = content().createGenerator(bytesStreamOutput)) {
generator.writeStartObject();
for (Map.Entry<String, String> entry : load(parser).entrySet()) {
generator.writeFieldName(entry.getKey());
String value = entry.getValue();
if (value == null) {
generator.writeNull();
} else {
generator.writeString(value);
}
}
generator.writeEndObject();
generator.flush();
return bytesStreamOutput.bytes().toUtf8();
}
}
public Map<String, String> load(XContentParser xContentParser) throws IOException { public Map<String, String> load(XContentParser xContentParser) throws IOException {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
Map<String, String> map = new HashMap<>(); Map<String, String> map = new HashMap<>();

View file

@ -1,10 +1,10 @@
package org.xbib.content.settings; package org.xbib.content.settings;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.xbib.content.XContentBuilder;
import org.xbib.content.XContentHelper; import org.xbib.content.XContentHelper;
import org.xbib.content.io.BytesArray;
import org.xbib.content.io.BytesReference;
import org.xbib.content.json.JsonSettingsLoader; import org.xbib.content.json.JsonSettingsLoader;
import org.xbib.content.json.JsonXContent; import org.xbib.content.json.JsonXContent;
@ -123,4 +123,12 @@ public class SettingsTest extends Assert {
assertEquals("{\"a.b\":\"c\"}", result); assertEquals("{\"a.b\":\"c\"}", result);
} }
@Test
public void testFlatMapAsString() throws IOException {
String s = "{\"a\":{\"b\":\"c\"}}";
BytesReference ref = new BytesArray(s.getBytes(StandardCharsets.UTF_8));
JsonSettingsLoader loader = new JsonSettingsLoader();
String result = loader.flatMapAsString(ref);
assertEquals("{\"a.b\":\"c\"}", result);
}
} }

View file

@ -1,5 +1,5 @@
dependencies { dependencies {
compile "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}" compile "com.fasterxml.jackson.core:jackson-databind:${project.property('jackson.version')}"
testCompile('junit:junit:4.12') { testCompile('junit:junit:4.12') {
exclude group: 'org.hamcrest' exclude group: 'org.hamcrest'
} }

View file

@ -5,6 +5,7 @@ import org.xbib.content.rdf.RdfContentParams;
import org.xbib.content.rdf.Resource; import org.xbib.content.rdf.Resource;
import org.xbib.content.rdf.internal.DefaultAnonymousResource; import org.xbib.content.rdf.internal.DefaultAnonymousResource;
import org.xbib.content.resource.IRI; import org.xbib.content.resource.IRI;
import org.xbib.content.xml.util.XMLUtil;
import org.xml.sax.Attributes; import org.xml.sax.Attributes;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.DefaultHandler;
@ -185,7 +186,7 @@ public abstract class AbstractXmlHandler<P extends RdfContentParams>
} }
public String content() { public String content() {
String s = content.toString().trim(); String s = XMLUtil.sanitizeToLineFeed(content.toString()).trim();
return s.length() > 0 ? s : null; return s.length() > 0 ? s : null;
} }

View file

@ -5,6 +5,7 @@ import org.xbib.content.rdf.RdfContentParams;
import org.xbib.content.rdf.RdfContentParser; import org.xbib.content.rdf.RdfContentParser;
import org.xbib.content.rdf.RdfContentType; import org.xbib.content.rdf.RdfContentType;
import org.xbib.content.rdf.StandardRdfContentType; import org.xbib.content.rdf.StandardRdfContentType;
import org.xbib.content.rdf.util.NormalizeEolFilter;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.xml.sax.XMLReader; import org.xml.sax.XMLReader;
@ -37,7 +38,7 @@ public class XmlContentParser<P extends RdfContentParams> implements RdfContentP
} }
public XmlContentParser(Reader reader) { public XmlContentParser(Reader reader) {
this.reader = reader; this.reader = new NormalizeEolFilter(reader, System.getProperty("line.separator"), true);
} }
@Override @Override

View file

@ -0,0 +1,86 @@
package org.xbib.content.rdf.util;
import java.io.IOException;
import java.io.Reader;
/**
*
*/
public class NormalizeEolFilter extends SimpleFilterReader {
private boolean previousWasEOL;
private boolean fixLast;
private int normalizedEOL = 0;
private char[] eol = null;
public NormalizeEolFilter(Reader in, String eolString, boolean fixLast) {
super(in);
eol = eolString.toCharArray();
this.fixLast = fixLast;
}
public int read() throws IOException {
int thisChar = super.read();
if (normalizedEOL == 0) {
int numEOL = 0;
boolean atEnd = false;
switch (thisChar) {
case '\u001A':
int c = super.read();
if (c == -1) {
atEnd = true;
if (fixLast && !previousWasEOL) {
numEOL = 1;
push(thisChar);
}
} else {
push(c);
}
break;
case -1:
atEnd = true;
if (fixLast && !previousWasEOL) {
numEOL = 1;
}
break;
case '\n':
numEOL = 1;
break;
case '\r':
numEOL = 1;
int c1 = super.read();
int c2 = super.read();
if (c1 != '\r' || c2 != '\n') {
if (c1 == '\r') {
numEOL = 2;
push(c2);
} else if (c1 == '\n') {
push(c2);
} else {
push(c2);
push(c1);
}
}
break;
default:
break;
}
if (numEOL > 0) {
while (numEOL-- > 0) {
push(eol);
normalizedEOL += eol.length;
}
previousWasEOL = true;
thisChar = read();
} else if (!atEnd) {
previousWasEOL = false;
}
} else {
normalizedEOL--;
}
return thisChar;
}
}

View file

@ -0,0 +1,98 @@
package org.xbib.content.rdf.util;
import java.io.IOException;
import java.io.Reader;
/**
* This filter reader redirects all read I/O methods through its own read() method.
*/
public class SimpleFilterReader extends Reader {
private static final int PREEMPT_BUFFER_LENGTH = 16;
private Reader in;
private int[] preempt = new int[PREEMPT_BUFFER_LENGTH];
private int preemptIndex = 0;
public SimpleFilterReader(Reader in) {
this.in = in;
}
public void push(char c) {
push((int) c);
}
public void push(int c) {
try {
preempt[preemptIndex++] = c;
} catch (ArrayIndexOutOfBoundsException e) {
int[] p2 = new int[preempt.length * 2];
System.arraycopy(preempt, 0, p2, 0, preempt.length);
preempt = p2;
push(c);
}
}
public void push(char[] cs, int start, int length) {
for (int i = start + length - 1; i >= start;) {
push(cs[i--]);
}
}
public void push(char[] cs) {
push(cs, 0, cs.length);
}
@Override
public int read() throws IOException {
return preemptIndex > 0 ? preempt[--preemptIndex] : in.read();
}
@Override
public void close() throws IOException {
in.close();
}
@Override
public void reset() throws IOException {
in.reset();
}
@Override
public boolean markSupported() {
return in.markSupported();
}
@Override
public boolean ready() throws IOException {
return in.ready();
}
@Override
public void mark(int i) throws IOException {
in.mark(i);
}
@Override
public long skip(long i) throws IOException {
return in.skip(i);
}
@Override
public int read(char[] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read(char[] buf, int start, int length) throws IOException {
int count = 0;
int c = 0;
while (length-- > 0 && (c = this.read()) != -1) {
buf[start++] = (char) c;
count++;
}
return (count == 0 && c == -1) ? -1 : count;
}
}

View file

@ -9,12 +9,12 @@ import org.xbib.content.rdf.io.IOTests;
import org.xbib.content.rdf.io.turtle.TurtleContentParams; import org.xbib.content.rdf.io.turtle.TurtleContentParams;
import org.xbib.content.resource.IRI; import org.xbib.content.resource.IRI;
import org.xbib.content.resource.IRINamespaceContext; import org.xbib.content.resource.IRINamespaceContext;
import org.xbib.content.resource.text.CharUtils;
import org.xbib.content.resource.url.UrlEncoding;
import org.xbib.helper.StreamTester; import org.xbib.helper.StreamTester;
import org.xbib.net.PercentEncoders;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
@ -50,8 +50,8 @@ public class OAITest extends StreamTester {
if ("identifier".equals(name.getLocalPart())) { if ("identifier".equals(name.getLocalPart())) {
// make sure we can build an opaque IRI, whatever is out there // make sure we can build an opaque IRI, whatever is out there
try { try {
getResource().setId(IRI.create("id:" getResource().setId(IRI.create("id:" +
+ UrlEncoding.encode(value, CharUtils.Profile.SCHEMESPECIFICPART.filter()))); PercentEncoders.getRegNameEncoder(StandardCharsets.UTF_8).encode(value)));
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -80,8 +80,7 @@ public class OAITest extends StreamTester {
.setDefaultNamespace("oai", "http://www.openarchives.org/OAI/2.0/oai_dc/"); .setDefaultNamespace("oai", "http://www.openarchives.org/OAI/2.0/oai_dc/");
XmlContentParser<TurtleContentParams> parser = new XmlContentParser<>(in); XmlContentParser<TurtleContentParams> parser = new XmlContentParser<>(in);
parser.builder(builder); parser.builder(builder);
parser.setHandler(xmlHandler) parser.setHandler(xmlHandler).parse();
.parse();
assertStream(getClass().getResourceAsStream("oai.ttl"), builder.streamInput()); assertStream(getClass().getResourceAsStream("oai.ttl"), builder.streamInput());
} }
} }

View file

@ -15,12 +15,12 @@ import org.xbib.content.rdf.io.ntriple.NTripleContentParams;
import org.xbib.content.rdf.io.turtle.TurtleContentParams; import org.xbib.content.rdf.io.turtle.TurtleContentParams;
import org.xbib.content.resource.IRI; import org.xbib.content.resource.IRI;
import org.xbib.content.resource.IRINamespaceContext; import org.xbib.content.resource.IRINamespaceContext;
import org.xbib.content.resource.text.CharUtils.Profile;
import org.xbib.content.resource.url.UrlEncoding;
import org.xbib.helper.StreamTester; import org.xbib.helper.StreamTester;
import org.xbib.net.PercentEncoders;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.logging.Level; import java.util.logging.Level;
@ -61,8 +61,8 @@ public class XmlReaderTest extends StreamTester {
if ("identifier".equals(name.getLocalPart()) && DefaultResource.isBlank(getResource())) { if ("identifier".equals(name.getLocalPart()) && DefaultResource.isBlank(getResource())) {
try { try {
// make sure we can build an opaque IRI, whatever is out there // make sure we can build an opaque IRI, whatever is out there
String s = UrlEncoding.encode(value, Profile.SCHEMESPECIFICPART.filter()); getResource().setId(IRI.create("id:" +
getResource().setId(IRI.create("id:" + s)); PercentEncoders.getRegNameEncoder(StandardCharsets.UTF_8).encode(value)));
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -210,7 +210,7 @@ public class XmlReaderTest extends StreamTester {
@Override @Override
public MyBuilder receive(Resource resource) throws IOException { public MyBuilder receive(Resource resource) throws IOException {
resource.triples().forEach(triples::add); triples.addAll(resource.triples());
return this; return this;
} }

View file

@ -1,7 +1,7 @@
@prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix oaidc: <http://www.openarchives.org/OAI/2.0/oai_dc/> . @prefix oaidc: <http://www.openarchives.org/OAI/2.0/oai_dc/> .
<id:http://oro.open.ac.uk/25656/5/butler%2Decoop11.pdf> dc:title "Improving the tokenisation of identifier names"; <id:http%3A%2F%2Foro.open.ac.uk%2F25656%2F5%2Fbutler%252Decoop11.pdf> dc:title "Improving the tokenisation of identifier names";
dc:creator "Butler, Simon", "Wermelinger, Michel", "Yu, Yijun", "Sharp, Helen"; dc:creator "Butler, Simon", "Wermelinger, Michel", "Yu, Yijun", "Sharp, Helen";
dc:description """Identifier names are the main vehicle for semantic information during program comprehension. For tool-supported program comprehension tasks, including concept location and requirements traceability, identifier names need to be tokenised into their semantic constituents. In this paper we present an approach to the automated tokenisation of identifier names that improves on existing techniques in two ways. First, it improves the tokenisation accuracy for single-case identifier names and for identifier names containing digits, which existing techniques largely ignore. Second, performance gains over existing techniques are achieved using smaller oracles, making the approach easier to deploy. dc:description """Identifier names are the main vehicle for semantic information during program comprehension. For tool-supported program comprehension tasks, including concept location and requirements traceability, identifier names need to be tokenised into their semantic constituents. In this paper we present an approach to the automated tokenisation of identifier names that improves on existing techniques in two ways. First, it improves the tokenisation accuracy for single-case identifier names and for identifier names containing digits, which existing techniques largely ignore. Second, performance gains over existing techniques are achieved using smaller oracles, making the approach easier to deploy.

View file

@ -1,6 +1,6 @@
@prefix oai: <http://www.openarchives.org/OAI/2.0/oai_dc/> . @prefix oai: <http://www.openarchives.org/OAI/2.0/oai_dc/> .
<id:oai:doaj-articles:8f128127aa68db508da01e428930bbcc> oai:OAI-PMH [ <id:oai%3Adoaj-articles%3A8f128127aa68db508da01e428930bbcc> oai:OAI-PMH [
oai:responseDate "2013-06-29T18:31:40Z"; oai:responseDate "2013-06-29T18:31:40Z";
oai:request "http://www.doaj.org/oai.article"; oai:request "http://www.doaj.org/oai.article";
oai:ListRecords [ oai:ListRecords [

View file

@ -2,3 +2,8 @@
tasks.withType(JavaCompile) { tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:all" << "-profile" << "compact2" options.compilerArgs << "-Xlint:all" << "-profile" << "compact2"
} }
dependencies {
compile "org.xbib:net:${project.property('xbib-net.version')}"
testCompile "com.fasterxml.jackson.core:jackson-databind:${project.property('jackson.version')}"
}

View file

@ -1,18 +1,19 @@
package org.xbib.content.resource; package org.xbib.content.resource;
import org.xbib.content.resource.scheme.HttpScheme;
import org.xbib.content.resource.scheme.Scheme;
import org.xbib.content.resource.scheme.SchemeRegistry;
import org.xbib.content.resource.text.CharUtils; import org.xbib.content.resource.text.CharUtils;
import org.xbib.content.resource.text.CharUtils.Profile; import org.xbib.content.resource.text.CharUtils.Profile;
import org.xbib.content.resource.text.InvalidCharacterException; import org.xbib.content.resource.text.InvalidCharacterException;
import org.xbib.content.resource.url.UrlEncoding; import org.xbib.net.PercentDecoder;
import org.xbib.net.PercentEncoders;
import org.xbib.net.scheme.Scheme;
import org.xbib.net.scheme.SchemeRegistry;
import java.io.IOException; import java.io.IOException;
import java.net.IDN; import java.net.IDN;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -177,25 +178,6 @@ public class IRI implements Comparable<IRI>, Node {
return new IRI(schemeClass, scheme, authority, userinfo, host, port, path, query, fragment); return new IRI(schemeClass, scheme, authority, userinfo, host, port, path, query, fragment);
} }
public static IRI normalize(IRI iri) {
if (iri.isOpaque() || iri.getPath() == null) {
return iri;
}
IRI normalized = null;
if (iri.schemeClass != null) {
normalized = iri.schemeClass.normalize(iri);
}
try {
return normalized != null ? normalized : new IRI(iri.schemeClass, iri.getScheme(), iri.getAuthority(), iri
.getUserInfo(), iri.getHost(), iri.getPort(), normalize(iri.getPath()), UrlEncoding.encode(UrlEncoding
.decode(iri.getQuery()), Profile.IQUERY.filter()), UrlEncoding
.encode(UrlEncoding.decode(iri.getFragment()), Profile.IFRAGMENT.filter()));
} catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e);
return null;
}
}
private static String normalize(String path) { private static String normalize(String path) {
if (path == null || path.length() == 0) { if (path == null || path.length() == 0) {
return "/"; return "/";
@ -211,13 +193,14 @@ public class IRI implements Comparable<IRI>, Node {
segments[n] = null; segments[n] = null;
} }
} }
PercentDecoder percentDecoder = new PercentDecoder();
for (String segment : segments) { for (String segment : segments) {
if (segment != null) { if (segment != null) {
if (buf.length() > 1) { if (buf.length() > 1) {
buf.append('/'); buf.append('/');
} }
try { try {
buf.append(UrlEncoding.encode(UrlEncoding.decode(segment), Profile.IPATHNODELIMS_SEG.filter())); buf.append(PercentEncoders.getMatrixEncoder(StandardCharsets.UTF_8).encode(percentDecoder.decode(segment)));
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -440,13 +423,13 @@ public class IRI implements Comparable<IRI>, Node {
if (authority != null && asciiAuthority == null) { if (authority != null && asciiAuthority == null) {
asciiAuthority = buildASCIIAuthority(); asciiAuthority = buildASCIIAuthority();
} }
return (asciiAuthority != null && asciiAuthority.length() > 0) ? asciiAuthority : null; return asciiAuthority != null && asciiAuthority.length() > 0 ? asciiAuthority : null;
} }
public String getASCIIFragment() { public String getASCIIFragment() {
if (fragment != null && asciiFragment == null) { if (fragment != null && asciiFragment == null) {
try { try {
asciiFragment = UrlEncoding.encode(fragment, Profile.FRAGMENT.filter()); asciiFragment = PercentEncoders.getFragmentEncoder(StandardCharsets.UTF_8).encode(fragment);
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -457,7 +440,7 @@ public class IRI implements Comparable<IRI>, Node {
public String getASCIIPath() { public String getASCIIPath() {
if (path != null && asciiPath == null) { if (path != null && asciiPath == null) {
try { try {
asciiPath = UrlEncoding.encode(path, Profile.PATH.filter()); asciiPath = PercentEncoders.getPathEncoder(StandardCharsets.UTF_8).encode(path);
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -468,7 +451,7 @@ public class IRI implements Comparable<IRI>, Node {
public String getASCIIQuery() { public String getASCIIQuery() {
if (query != null && asciiQuery == null) { if (query != null && asciiQuery == null) {
try { try {
asciiQuery = UrlEncoding.encode(query, Profile.QUERY.filter(), Profile.PATH.filter()); asciiQuery = PercentEncoders.getQueryEncoder(StandardCharsets.UTF_8).encode(query);
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -479,7 +462,7 @@ public class IRI implements Comparable<IRI>, Node {
public String getASCIIUserInfo() { public String getASCIIUserInfo() {
if (userinfo != null && asciiUserinfo == null) { if (userinfo != null && asciiUserinfo == null) {
try { try {
asciiUserinfo = UrlEncoding.encode(userinfo, Profile.USERINFO.filter()); asciiUserinfo = PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(userinfo);
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
} }
@ -497,18 +480,9 @@ public class IRI implements Comparable<IRI>, Node {
} }
private String buildASCIIAuthority() { private String buildASCIIAuthority() {
if (schemeClass instanceof HttpScheme) { StringBuilder buf = new StringBuilder();
StringBuilder buf = new StringBuilder(); buildAuthority(buf, getASCIIUserInfo(), getASCIIHost(), getPort());
buildAuthority(buf, getASCIIUserInfo(), getASCIIHost(), getPort()); return buf.toString();
return buf.toString();
} else {
try {
return UrlEncoding.encode(authority, Profile.AUTHORITY.filter());
} catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e);
return null;
}
}
} }
public boolean isAbsolute() { public boolean isAbsolute() {
@ -538,10 +512,6 @@ public class IRI implements Comparable<IRI>, Node {
return resolve(this, new IRI(iri)); return resolve(this, new IRI(iri));
} }
public IRI normalize() {
return normalize(this);
}
@Override @Override
public String toString() { public String toString() {
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();
@ -555,7 +525,7 @@ public class IRI implements Comparable<IRI>, Node {
public String toEncodedString() { public String toEncodedString() {
try { try {
return UrlEncoding.encode(toString(), Profile.SCHEMESPECIFICPART.filter()); return PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(toString());
} catch (IOException e) { } catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e); logger.log(Level.FINE, e.getMessage(), e);
return null; return null;

View file

@ -1,43 +0,0 @@
package org.xbib.content.resource.scheme;
import org.xbib.content.resource.IRI;
/**
* Base implementation for IRI scheme providers.
*/
public abstract class AbstractScheme implements Scheme {
protected final String name;
protected final int port;
protected AbstractScheme(String name, int port) {
this.name = name;
this.port = port;
}
@Override
public int getDefaultPort() {
return port;
}
@Override
public String getName() {
return name;
}
/**
* Default return unmodified.
*/
@Override
public IRI normalize(IRI iri) {
return iri;
}
/**
* Default return unmodified.
*/
@Override
public String normalizePath(String path) {
return path;
}
}

View file

@ -1,12 +0,0 @@
package org.xbib.content.resource.scheme;
/**
*
*/
public class DefaultScheme extends AbstractScheme {
public DefaultScheme(String name) {
super(name, -1);
}
}

View file

@ -1,16 +0,0 @@
package org.xbib.content.resource.scheme;
/**
*
*/
public class FtpScheme extends HttpScheme {
static final String FTP_SCHEME_NAME = "ftp";
private static final int DEFAULT_PORT = 21;
public FtpScheme() {
super(FTP_SCHEME_NAME, DEFAULT_PORT);
}
}

View file

@ -1,56 +0,0 @@
package org.xbib.content.resource.scheme;
import org.xbib.content.resource.IRI;
import org.xbib.content.resource.text.CharUtils.Profile;
import org.xbib.content.resource.url.UrlEncoding;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
*/
public class HttpScheme extends AbstractScheme {
static final String HTTP_SCHEME_NAME = "http";
private static final Logger logger = Logger.getLogger(HttpScheme.class.getName());
private static final int DEFAULT_PORT = 80;
HttpScheme() {
super(HTTP_SCHEME_NAME, DEFAULT_PORT);
}
HttpScheme(String name, int port) {
super(name, port);
}
@Override
public IRI normalize(IRI iri) {
int port = (iri.getPort() == getDefaultPort()) ? -1 : iri.getPort();
String host = iri.getHost();
if (host != null) {
host = host.toLowerCase();
}
try {
return IRI.builder()
.scheme(iri.getScheme())
.userinfo(iri.getUserInfo())
.host(host)
.port(port)
.path(iri.getPath())
.query(UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()), Profile.IQUERY.filter()))
.fragment(UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()), Profile.IFRAGMENT.filter()))
.build();
} catch (IOException e) {
logger.log(Level.FINE, e.getMessage(), e);
return null;
}
}
@Override
public String normalizePath(String path) {
return null;
}
}

View file

@ -1,12 +0,0 @@
package org.xbib.content.resource.scheme;
class HttpsScheme extends HttpScheme {
static final String HTTPS_SCHEME_NAME = "https";
private static final int DEFAULT_PORT = 443;
public HttpsScheme() {
super(HTTPS_SCHEME_NAME, DEFAULT_PORT);
}
}

View file

@ -1,17 +0,0 @@
package org.xbib.content.resource.scheme;
import org.xbib.content.resource.IRI;
/**
* Interface implemented by custom IRI scheme parsers.
*/
public interface Scheme {
String getName();
IRI normalize(IRI iri);
String normalizePath(String path);
int getDefaultPort();
}

View file

@ -1,59 +0,0 @@
package org.xbib.content.resource.scheme;
import java.util.HashMap;
import java.util.Map;
/**
* Static registry of custom IRI schemes.
*/
public final class SchemeRegistry {
private static SchemeRegistry registry;
private final Map<String, Scheme> schemes;
SchemeRegistry() {
schemes = new HashMap<>();
schemes.put(HttpScheme.HTTP_SCHEME_NAME, new HttpScheme());
schemes.put(HttpsScheme.HTTPS_SCHEME_NAME, new HttpsScheme());
schemes.put(FtpScheme.FTP_SCHEME_NAME, new FtpScheme());
}
public static SchemeRegistry getInstance() {
if (registry == null) {
registry = new SchemeRegistry();
}
return registry;
}
@SuppressWarnings("unchecked")
public boolean register(String schemeClass) throws ClassNotFoundException, IllegalAccessException,
InstantiationException {
Class<Scheme> klass = (Class<Scheme>) Thread.currentThread().getContextClassLoader().loadClass(schemeClass);
return register(klass);
}
public boolean register(Class<Scheme> schemeClass) throws IllegalAccessException,
InstantiationException {
Scheme scheme = schemeClass.newInstance();
return register(scheme);
}
public boolean register(Scheme scheme) {
String name = scheme.getName();
if (schemes.get(name) == null) {
schemes.put(name.toLowerCase(), scheme);
return true;
} else {
return false;
}
}
public Scheme getScheme(String scheme) {
if (scheme == null) {
return null;
}
Scheme s = schemes.get(scheme.toLowerCase());
return (s != null) ? s : new DefaultScheme(scheme);
}
}

View file

@ -1,4 +0,0 @@
/**
* Classes for resource schemes.
*/
package org.xbib.content.resource.scheme;

View file

@ -1,195 +0,0 @@
package org.xbib.content.resource.url;
import static java.nio.charset.CoderResult.OVERFLOW;
import static java.nio.charset.CoderResult.UNDERFLOW;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
/**
* Decodes percent-encoded (%XX) Unicode text.
*/
public final class PercentDecoder {
/**
* Written to with decoded chars by decoder.
*/
private final CharBuffer decodedCharBuf;
private final CharsetDecoder decoder;
/**
* The decoded string for the current input.
*/
private final StringBuilder outputBuf = new StringBuilder();
/**
* bytes represented by the current sequence of %-triples. Resized as needed.
*/
private ByteBuffer encodedBuf;
/**
* Construct a new PercentDecoder with default buffer sizes.
*
* @param charsetDecoder Charset to decode bytes into chars with
* @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
*/
public PercentDecoder(CharsetDecoder charsetDecoder) {
this(charsetDecoder, 16, 16);
}
/**
* @param charsetDecoder Charset to decode bytes into chars with
* @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
* @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
*/
public PercentDecoder(CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
int decodedCharBufSize) {
encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
decoder = charsetDecoder;
}
/**
* @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
* "%20" for a space character
* @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
* @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
* detected
*/
public String decode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
outputBuf.setLength(0);
// this is almost always an underestimate of the size needed:
// only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
outputBuf.ensureCapacity(input.length() / 8);
encodedBuf.clear();
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (c != '%') {
handleEncodedBytes();
outputBuf.append(c);
continue;
}
if (i + 2 >= input.length()) {
throw new IllegalArgumentException(
"Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
}
// grow the byte buf if needed
if (encodedBuf.remaining() == 0) {
ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
encodedBuf.flip();
largerBuf.put(encodedBuf);
encodedBuf = largerBuf;
}
// note that we advance i here as we consume chars
int msBits = Character.digit(input.charAt(++i), 16);
int lsBits = Character.digit(input.charAt(++i), 16);
if (msBits == -1 || lsBits == -1) {
throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
}
msBits <<= 4;
msBits |= lsBits;
// msBits can only have 8 bits set, so cast is safe
encodedBuf.put((byte) msBits);
}
handleEncodedBytes();
return outputBuf.toString();
}
/**
* Decode any buffered encoded bytes and write them to the output buf.
*/
private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
if (encodedBuf.position() == 0) {
// nothing to do
return;
}
decoder.reset();
CoderResult coderResult;
// switch to reading mode
encodedBuf.flip();
// loop while we're filling up the decoded char buf, or there's any encoded bytes
// decode() in practice seems to only consume bytes when it can decode an entire char...
do {
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
throwIfError(coderResult);
appendDecodedChars();
} while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
// final decode with end-of-input flag
decodedCharBuf.clear();
coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
throwIfError(coderResult);
if (encodedBuf.hasRemaining()) {
throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
}
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
}
appendDecodedChars();
// we've finished the input, wrap it up
encodedBuf.clear();
flush();
}
/**
* Must only be called when the input encoded bytes buffer is empty.
*/
private void flush() throws MalformedInputException, UnmappableCharacterException {
CoderResult coderResult;
decodedCharBuf.clear();
coderResult = decoder.flush(decodedCharBuf);
appendDecodedChars();
throwIfError(coderResult);
if (coderResult != UNDERFLOW) {
throw new IllegalStateException("Decoder flush resulted in " + coderResult);
}
}
/**
* If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
* CharacterCodingException.
*
* @param coderResult result to check
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
if (coderResult.isMalformed()) {
throw new MalformedInputException(coderResult.length());
}
if (coderResult.isUnmappable()) {
throw new UnmappableCharacterException(coderResult.length());
}
}
/**
* Flip the decoded char buf and append it to the string bug.
*/
private void appendDecodedChars() {
decodedCharBuf.flip();
outputBuf.append(decodedCharBuf);
}
}

View file

@ -1,187 +0,0 @@
package org.xbib.content.resource.url;
import static java.lang.Character.isHighSurrogate;
import static java.lang.Character.isLowSurrogate;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import java.util.BitSet;
/**
* Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
*
* This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
* PercentEncoder instances.
*/
public final class PercentEncoder {
private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
private final BitSet safeChars;
private final CharsetEncoder encoder;
/**
* Pre-allocate a string handler to make the common case of encoding to a string faster.
*/
private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
private final ByteBuffer encodedBytes;
private final CharBuffer unsafeCharsToEncode;
/**
* @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
* those chars set to true. Treated as read only.
* @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder
* instances
* across threads.
*/
public PercentEncoder(BitSet safeChars, CharsetEncoder charsetEncoder) {
this.safeChars = safeChars;
this.encoder = charsetEncoder;
// why is this a float? sigh.
int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
// need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
unsafeCharsToEncode = CharBuffer.allocate(2);
}
/**
* @param result result to check
* @throws IllegalStateException if result is overflow
* @throws MalformedInputException if result represents malformed input
* @throws UnmappableCharacterException if result represents an unmappable character
*/
private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
if (result.isOverflow()) {
throw new IllegalStateException("Byte buffer overflow; this should not happen.");
}
if (result.isMalformed()) {
throw new MalformedInputException(result.length());
}
if (result.isUnmappable()) {
throw new UnmappableCharacterException(result.length());
}
}
/**
* Encode the input and pass output chars to a handler.
*
* @param input input string
* @param handler handler to call on each output character
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public void encode(CharSequence input, StringBuilderPercentEncoderOutputHandler handler) throws
MalformedInputException, UnmappableCharacterException {
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (safeChars.get(c)) {
handler.onOutputChar(c);
continue;
}
// not a safe char
unsafeCharsToEncode.clear();
unsafeCharsToEncode.append(c);
if (isHighSurrogate(c)) {
if (input.length() > i + 1) {
// get the low surrogate as well
char lowSurrogate = input.charAt(i + 1);
if (isLowSurrogate(lowSurrogate)) {
unsafeCharsToEncode.append(lowSurrogate);
i++;
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
.toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
.toHexString(lowSurrogate) + ")");
}
} else {
throw new IllegalArgumentException(
"Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
.toHexString(c) + ")");
}
}
flushUnsafeCharBuffer(handler);
}
}
/**
* Encode the input and return the resulting text as a String.
*
* @param input input string
* @return the input string with every character that's not in safeChars turned into its byte representation via the
* instance's encoder and then percent-encoded
* @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
* @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
* detected
*/
public String encode(CharSequence input) throws MalformedInputException, UnmappableCharacterException {
stringHandler.reset();
stringHandler.ensureCapacity(input.length());
encode(input, stringHandler);
return stringHandler.getContents();
}
/**
* Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
*
* Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
*/
private void flushUnsafeCharBuffer(StringBuilderPercentEncoderOutputHandler handler) throws MalformedInputException,
UnmappableCharacterException {
// need to read from the char buffer, which was most recently written to
unsafeCharsToEncode.flip();
encodedBytes.clear();
encoder.reset();
CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
checkResult(result);
result = encoder.flush(encodedBytes);
checkResult(result);
// read contents of bytebuffer
encodedBytes.flip();
while (encodedBytes.hasRemaining()) {
byte b = encodedBytes.get();
handler.onOutputChar('%');
handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
handler.onOutputChar(HEX_CODE[b & 0xF]);
}
}
private static class StringBuilderPercentEncoderOutputHandler {
private final StringBuilder stringBuilder;
StringBuilderPercentEncoderOutputHandler() {
stringBuilder = new StringBuilder();
}
String getContents() {
return stringBuilder.toString();
}
void reset() {
stringBuilder.setLength(0);
}
void ensureCapacity(int length) {
stringBuilder.ensureCapacity(length);
}
void onOutputChar(char c) {
stringBuilder.append(c);
}
}
}

View file

@ -1,472 +0,0 @@
package org.xbib.content.resource.url;
import static org.xbib.content.resource.url.UrlPercentEncoders.getFragmentEncoder;
import static org.xbib.content.resource.url.UrlPercentEncoders.getMatrixEncoder;
import static org.xbib.content.resource.url.UrlPercentEncoders.getPathEncoder;
import static org.xbib.content.resource.url.UrlPercentEncoders.getQueryParamEncoder;
import static org.xbib.content.resource.url.UrlPercentEncoders.getRegNameEncoder;
import static org.xbib.content.resource.url.UrlPercentEncoders.getUnstructuredQueryEncoder;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
/**
* Builder for urls with url-encoding applied to path, query param, etc.
*
* Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec (http://www.w3.org/TR/html401/interact/forms.html#form-content-type).
* This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
* HTTP-useful URLs.
*/
public final class UrlBuilder {
/**
* IPv6 address, taken from <a href="http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings">Stack Overflow</a>.
*/
private static final Pattern IPV6_PATTERN = Pattern.compile(
"\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
/**
* IPv4 dotted quad.
*/
private static final Pattern IPV4_PATTERN = Pattern
.compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
private final String scheme;
private final String host;
private final Integer port;
private final List<Pair<String, String>> queryParams = new ArrayList<>();
private final List<PathSegment> pathSegments = new ArrayList<>();
private final PercentEncoder pathEncoder = getPathEncoder();
private final PercentEncoder regNameEncoder = getRegNameEncoder();
private final PercentEncoder matrixEncoder = getMatrixEncoder();
private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
private final PercentEncoder fragmentEncoder = getFragmentEncoder();
/**
* If this is non-null, queryParams must be empty, and vice versa.
*/
private String unstructuredQuery;
private String fragment;
private boolean forceTrailingSlash = false;
/**
* Create a URL with UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host (e.g. foo.com or 1.2.3.4 or [::1])
* @param port null or a positive integer
*/
private UrlBuilder(String scheme, String host, Integer port) {
this.host = host;
this.scheme = scheme;
this.port = port;
}
/**
* Create a URL with an null port and UTF-8 encoding.
*
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @return a url builder
* @see UrlBuilder#forHost(String scheme, String host, int port)
*/
public static UrlBuilder forHost(String scheme, String host) {
return new UrlBuilder(scheme, host, null);
}
/**
* @param scheme scheme (e.g. http)
* @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
* ([::1]), excluding IPvFuture since no one uses that in practice
* @param port port
* @return a url builder
*/
public static UrlBuilder forHost(String scheme, String host, int port) {
return new UrlBuilder(scheme, host, port);
}
/**
* Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
* query string apply.
*
* @param url url to initialize builder with
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if char decoding fails
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url) throws CharacterCodingException {
return fromUrl(url, StandardCharsets.UTF_8.newDecoder());
}
/**
* Create a UrlBuilder initialized with the contents of a {@link URL}.
*
* The query string will be parsed into HTML4 query params if it can be separated into a
* <code>&amp;</code>-separated sequence of <code>key=value</code> pairs. The sequence of query params can then be
* appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
* only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
* is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
* string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
* UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
* to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
*
* @param url url to initialize builder with
* @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
* @return a UrlBuilder containing the host, path, etc. from the url
* @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
* report errors
* @see UrlBuilder#fromUrl(URL, CharsetDecoder)
*/
public static UrlBuilder fromUrl(URL url, CharsetDecoder charsetDecoder) throws
CharacterCodingException {
PercentDecoder decoder = new PercentDecoder(charsetDecoder);
// reg names must be encoded UTF-8
PercentDecoder regNameDecoder;
if (charsetDecoder.charset().equals(StandardCharsets.UTF_8)) {
regNameDecoder = decoder;
} else {
regNameDecoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
}
Integer port = url.getPort();
if (port == -1) {
port = null;
}
UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
buildFromPath(builder, decoder, url);
buildFromQuery(builder, decoder, url);
if (url.getRef() != null) {
builder.fragment(decoder.decode(url.getRef()));
}
return builder;
}
/**
* Populate a url builder based on the query of an URL.
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException if build fails
*/
private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
if (url.getQuery() != null) {
String q = url.getQuery();
List<Pair<String, String>> pairs = new ArrayList<>();
boolean parseOk = true;
for (String queryChunk : q.split("&")) {
String[] queryParamChunks = queryChunk.split("=");
if (queryParamChunks.length != 2) {
parseOk = false;
break;
}
pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
decoder.decode(queryParamChunks[1])));
}
if (parseOk) {
for (Pair<String, String> pair : pairs) {
builder.queryParam(pair.getKey(), pair.getValue());
}
} else {
builder.unstructuredQuery(decoder.decode(q));
}
}
}
/**
* Populate the path segments of a url builder from an URL.
*
* @param builder builder
* @param decoder decoder
* @param url url
* @throws CharacterCodingException if build fails
*/
private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
CharacterCodingException {
for (String pathChunk : url.getPath().split("/")) {
if ("".equals(pathChunk)) {
continue;
}
if (pathChunk.charAt(0) == ';') {
builder.pathSegment("");
for (String matrixChunk : pathChunk.substring(1).split(";")) {
buildFromMatrixParamChunk(decoder, builder, matrixChunk);
}
continue;
}
String[] matrixChunks = pathChunk.split(";");
builder.pathSegment(decoder.decode(matrixChunks[0]));
for (int i = 1; i < matrixChunks.length; i++) {
buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
}
}
}
private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
CharacterCodingException {
String[] mtxPair = pathMatrixChunk.split("=");
if (mtxPair.length != 2) {
throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
}
String mtxName = mtxPair[0];
String mtxVal = mtxPair[1];
ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
}
/**
* Add a path segment.
*
* @param segment a path segment
* @return this
*/
public UrlBuilder pathSegment(String segment) {
pathSegments.add(new PathSegment(segment));
return this;
}
/**
* Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
*
* @param segments path segments
* @return this
*/
public UrlBuilder pathSegments(String... segments) {
for (String segment : segments) {
pathSegment(segment);
}
return this;
}
/**
* Add an HTML query parameter. Query parameters will be encoded in the order added.
*
* Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
* http://www.w3.org/TR/html401/interact/forms.html#form-content-type.
*
* If you use this method to build a query string, or created this builder from a url with a query string that can
* successfully be parsed into query param pairs, you cannot subsequently use {@link
* UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder queryParam(String name, String value) {
if (unstructuredQuery != null) {
throw new IllegalStateException(
"Cannot call queryParam() when this already has an unstructured query specified");
}
queryParams.add(Pair.of(name, value));
return this;
}
/**
* Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
* is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
* that query.
*
* If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
* cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
* CharsetDecoder)}.
*
* @param query Complete URI query, as specified by https://tools.ietf.org/html/rfc3986#section-3.4
* @return this
*/
public UrlBuilder unstructuredQuery(String query) {
if (!queryParams.isEmpty()) {
throw new IllegalStateException(
"Cannot call unstructuredQuery() when this already has queryParam pairs specified");
}
unstructuredQuery = query;
return this;
}
/**
* Clear the unstructured query and any query params.
*
* Since the query / query param situation is a little complicated, this method will let you remove all query
* information and start again from scratch. This may be useful when taking an existing url, parsing it into a
* builder, and then re-doing its query params, for instance.
*
* @return this
*/
public UrlBuilder clearQuery() {
queryParams.clear();
unstructuredQuery = null;
return this;
}
/**
* Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
* root. Matrix params will be encoded in the order added.
*
* @param name param name
* @param value param value
* @return this
*/
public UrlBuilder matrixParam(String name, String value) {
if (pathSegments.isEmpty()) {
// create an empty path segment to represent a matrix param applied to the root
pathSegment("");
}
PathSegment seg = pathSegments.get(pathSegments.size() - 1);
seg.matrixParams.add(Pair.of(name, value));
return this;
}
/**
* Set the fragment.
*
* @param fragment fragment string
* @return this
*/
public UrlBuilder fragment(String fragment) {
this.fragment = fragment;
return this;
}
/**
* Force the generated URL to have a trailing slash at the end of the path.
*
* @return this
*/
public UrlBuilder forceTrailingSlash() {
forceTrailingSlash = true;
return this;
}
/**
* Encode the current builder state into a URL string.
*
* @return a well-formed URL string
* @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
*/
public String toUrlString() throws CharacterCodingException {
StringBuilder buf = new StringBuilder();
buf.append(scheme);
buf.append("://");
buf.append(encodeHost(host));
if (port != null) {
buf.append(':');
buf.append(port);
}
for (PathSegment pathSegment : pathSegments) {
buf.append('/');
buf.append(pathEncoder.encode(pathSegment.segment));
for (Pair<String, String> matrixParam : pathSegment.matrixParams) {
buf.append(';');
buf.append(matrixEncoder.encode(matrixParam.getKey()));
buf.append('=');
buf.append(matrixEncoder.encode(matrixParam.getValue()));
}
}
if (forceTrailingSlash) {
buf.append('/');
}
if (!queryParams.isEmpty()) {
buf.append("?");
Iterator<Pair<String, String>> qpIter = queryParams.iterator();
while (qpIter.hasNext()) {
Pair<String, String> queryParam = qpIter.next();
buf.append(queryParamEncoder.encode(queryParam.getKey()));
buf.append('=');
buf.append(queryParamEncoder.encode(queryParam.getValue()));
if (qpIter.hasNext()) {
buf.append('&');
}
}
} else if (unstructuredQuery != null) {
buf.append("?");
buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
}
if (fragment != null) {
buf.append('#');
buf.append(fragmentEncoder.encode(fragment));
}
return buf.toString();
}
/**
* @param host original host string
* @return host encoded as in RFC 3986 section 3.2.2
*/
private String encodeHost(String host) throws CharacterCodingException {
// matching order: IP-literal, IPv4, reg-name
if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
return host;
}
// it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
return regNameEncoder.encode(host);
}
/**
* Bundle of a path segment name and any associated matrix params.
*/
private static class PathSegment {
private final String segment;
private final List<Pair<String, String>> matrixParams = new ArrayList<>();
PathSegment(String segment) {
this.segment = segment;
}
}
private static class Pair<K, V> {
K key;
V value;
Pair(K key, V value) {
this.key = key;
this.value = value;
}
@SuppressWarnings("unchecked")
static <K, V> Pair<K, V> of(K key, V value) {
return new Pair<>(key, value);
}
K getKey() {
return key;
}
V getValue() {
return value;
}
}
}

View file

@ -1,148 +0,0 @@
package org.xbib.content.resource.url;
import org.xbib.content.resource.text.CharUtils;
import org.xbib.content.resource.text.Filter;
import java.io.EOFException;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
/**
* Performs URL Percent Encoding.
*/
public final class UrlEncoding {
private static final char[] HEX = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private UrlEncoding() {
}
private static void encode(Appendable sb, byte... bytes) throws IOException {
encode(sb, 0, bytes.length, bytes);
}
private static void encode(Appendable sb, int offset, int length, byte... bytes) throws IOException {
for (int n = offset, i = 0; n < bytes.length && i < length; n++, i++) {
byte c = bytes[n];
sb.append("%");
sb.append(HEX[(c >> 4) & 0x0f]);
sb.append(HEX[c & 0x0f]);
}
}
public static String encode(CharSequence s, Filter filter) throws IOException {
return encode(s, new Filter[]{filter});
}
public static String encode(CharSequence s, Filter... filters) throws IOException {
if (s == null) {
return null;
}
return encode(s, "utf-8", filters);
}
private static boolean check(int codepoint, Filter... filters) {
for (Filter filter : filters) {
if (filter.accept(codepoint)) {
return true;
}
}
return false;
}
public static String encode(CharSequence s, String enc, Filter... filters) throws IOException {
if (s == null) {
return null;
}
StringBuilder sb = new StringBuilder();
for (int n = 0; n < s.length(); n++) {
char c = s.charAt(n);
if (!CharUtils.isHighSurrogate(c) && check(c, filters)) {
encode(sb, String.valueOf(c).getBytes(enc));
} else if (CharUtils.isHighSurrogate(c)) {
if (check(c, filters)) {
String buf = String.valueOf(c) + s.charAt(++n);
byte[] b = buf.getBytes(enc);
encode(sb, b);
} else {
sb.append(c);
sb.append(s.charAt(++n));
}
} else {
sb.append(c);
}
}
return sb.toString();
}
public static String decode(String string) throws IOException {
String e = string;
char[] buf = new char[e.length()];
try (DecodingReader r = new DecodingReader(new StringReader(e))) {
int l = r.read(buf);
e = new String(buf, 0, l);
}
return e;
}
/**
*
*/
private static class DecodingReader extends FilterReader {
DecodingReader(Reader in) {
super(in);
}
@Override
public int read() throws IOException {
int c = super.read();
if (c == '%') {
int c1 = super.read();
int c2 = super.read();
return decode((char) c1, (char) c2);
} else {
return c;
}
}
@Override
public int read(char[] b, int off, int len) throws IOException {
int n = off;
int i;
while ((i = read()) != -1 && n < off + len) {
b[n++] = (char) i;
}
return n - off;
}
@Override
public int read(char[] b) throws IOException {
return read(b, 0, b.length);
}
@Override
public long skip(long n) throws IOException {
long i = 0;
int c;
for (; i < n; i++) {
c = read();
if (c == -1) {
throw new EOFException();
}
}
return i;
}
private static byte decode(char c, int shift) {
return (byte) ((((c >= '0' && c <= '9') ? c - '0' : (c >= 'A' && c <= 'F') ? c - 'A' + 10
: (c >= 'a' && c <= 'f') ? c - 'a' + 10 : -1) & 0xf) << shift);
}
private static byte decode(char c1, char c2) {
return (byte) (decode(c1, 4) | decode(c2, 0));
}
}
}

View file

@ -1,166 +0,0 @@
package org.xbib.content.resource.url;
import static java.nio.charset.CodingErrorAction.REPLACE;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
/**
* See RFC 3986, RFC 1738 and <a href="http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding">Lunatech research</a>.
*/
public final class UrlPercentEncoders {
/**
* An encoder for RFC 3986 reg-names.
*/
private static final BitSet REG_NAME_BIT_SET = new BitSet();
private static final BitSet PATH_BIT_SET = new BitSet();
private static final BitSet MATRIX_BIT_SET = new BitSet();
private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
private static final BitSet FRAGMENT_BIT_SET = new BitSet();
static {
// RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
// Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
addUnreserved(REG_NAME_BIT_SET);
addSubdelims(REG_NAME_BIT_SET);
// Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
addPChar(PATH_BIT_SET);
PATH_BIT_SET.clear((int) ';');
// Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?')
// are already excluded.
addPChar(MATRIX_BIT_SET);
MATRIX_BIT_SET.clear((int) ';');
MATRIX_BIT_SET.clear((int) '=');
/*
* At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
* specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
*/
addQuery(UNSTRUCTURED_QUERY_BIT_SET);
UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
/*
* Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
* pairs can be used.
*/
QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
QUERY_PARAM_BIT_SET.clear((int) '=');
QUERY_PARAM_BIT_SET.clear((int) '&');
addFragment(FRAGMENT_BIT_SET);
}
private UrlPercentEncoders() {
}
public static PercentEncoder getRegNameEncoder() {
return new PercentEncoder(REG_NAME_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getPathEncoder() {
return new PercentEncoder(PATH_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getMatrixEncoder() {
return new PercentEncoder(MATRIX_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getUnstructuredQueryEncoder() {
return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getQueryParamEncoder() {
return new PercentEncoder(QUERY_PARAM_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
public static PercentEncoder getFragmentEncoder() {
return new PercentEncoder(FRAGMENT_BIT_SET, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
/**
* Add code points for 'fragment' chars.
*
* @param fragmentBitSet bit set
*/
private static void addFragment(BitSet fragmentBitSet) {
addPChar(fragmentBitSet);
fragmentBitSet.set((int) '/');
fragmentBitSet.set((int) '?');
}
/**
* Add code points for 'query' chars.
*
* @param queryBitSet bit set
*/
private static void addQuery(BitSet queryBitSet) {
addPChar(queryBitSet);
queryBitSet.set((int) '/');
queryBitSet.set((int) '?');
}
/**
* Add code points for 'pchar' chars.
*
* @param bs bitset
*/
private static void addPChar(BitSet bs) {
addUnreserved(bs);
addSubdelims(bs);
bs.set((int) ':');
bs.set((int) '@');
}
/**
* Add codepoints for 'unreserved' chars.
*
* @param bs bitset to add codepoints to
*/
private static void addUnreserved(BitSet bs) {
for (int i = 'a'; i <= 'z'; i++) {
bs.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
bs.set(i);
}
for (int i = '0'; i <= '9'; i++) {
bs.set(i);
}
bs.set((int) '-');
bs.set((int) '.');
bs.set((int) '_');
bs.set((int) '~');
}
/**
* Add codepoints for 'sub-delims' chars.
*
* @param bs bitset to add codepoints to
*/
private static void addSubdelims(BitSet bs) {
bs.set((int) '!');
bs.set((int) '$');
bs.set((int) '&');
bs.set((int) '\'');
bs.set((int) '(');
bs.set((int) ')');
bs.set((int) '*');
bs.set((int) '+');
bs.set((int) ',');
bs.set((int) ';');
bs.set((int) '=');
}
}

View file

@ -1,4 +0,0 @@
/**
* Classes for URL encoding and decoding.
*/
package org.xbib.content.resource.url;

View file

@ -1,87 +0,0 @@
package org.xbib.content.resource.url;
import static org.junit.Assert.assertEquals;
import static java.nio.charset.CodingErrorAction.REPLACE;
import org.junit.Before;
import org.junit.Test;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException;
import java.util.BitSet;
/**
*
*/
public final class PercentEncoderTest {
private PercentEncoder alnum;
private PercentEncoder alnum16;
@Before
public void setUp() {
BitSet bs = new BitSet();
for (int i = 'a'; i <= 'z'; i++) {
bs.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
bs.set(i);
}
for (int i = '0'; i <= '9'; i++) {
bs.set(i);
}
this.alnum = new PercentEncoder(bs, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
this.alnum16 = new PercentEncoder(bs, StandardCharsets.UTF_16BE.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
}
@Test
public void testDoesntEncodeSafe() throws CharacterCodingException {
BitSet set = new BitSet();
for (int i = 'a'; i <= 'z'; i++) {
set.set(i);
}
PercentEncoder pe = new PercentEncoder(set, StandardCharsets.UTF_8.newEncoder().onMalformedInput(REPLACE)
.onUnmappableCharacter(REPLACE));
assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD"));
}
@Test
public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException {
assertEquals("abc%20123", alnum.encode("abc 123"));
}
@Test
public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException {
assertEquals("%20abc%20", alnum.encode(" abc "));
}
@Test
public void testEncodeUtf8() throws CharacterCodingException {
// 1 UTF-16 char (unicode snowman)
assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603"));
}
@Test
public void testEncodeUtf8SurrogatePair() throws CharacterCodingException {
// musical G clef: 1d11e, has to be represented in surrogate pair form
assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e"));
}
@Test
public void testEncodeUtf16() throws CharacterCodingException {
// 1 UTF-16 char (unicode snowman)
assertEquals("snowman%26%03", alnum16.encode("snowman\u2603"));
}
@Test
public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException {
// musical G clef: 1d11e, has to be represented in surrogate pair form
assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e"));
}
}

View file

@ -1,433 +0,0 @@
package org.xbib.content.resource.url;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.xbib.content.resource.url.UrlBuilder.forHost;
import static org.xbib.content.resource.url.UrlBuilder.fromUrl;
import org.junit.Test;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
/**
*
*/
public final class UrlBuilderTest {
private static void assertUrlEquals(String expected, String actual)
throws URISyntaxException, MalformedURLException {
assertEquals(expected, actual);
assertEquals(expected, new URI(actual).toString());
assertEquals(expected, new URL(actual).toString());
}
@Test
public void testNoUrlParts() throws Exception {
assertUrlEquals("http://foo.com", forHost("http", "foo.com").toUrlString());
}
@Test
public void testWithPort() throws Exception {
assertUrlEquals("http://foo.com:33", forHost("http", "foo.com", 33).toUrlString());
}
@Test
public void testSimplePath() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.pathSegment("seg1").pathSegment("seg2");
assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString());
}
@Test
public void testPathWithReserved() throws Exception {
// RFC 1738 S3.3
UrlBuilder ub = forHost("http", "foo.com");
ub.pathSegment("seg/;?ment").pathSegment("seg=&2");
assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString());
}
@Test
public void testPathSegments() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.pathSegments("seg1", "seg2", "seg3");
assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString());
}
@Test
public void testMatrixWithoutPathHasLeadingSlash() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.matrixParam("foo", "bar");
assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString());
}
@Test
public void testMatrixWithReserved() throws Exception {
UrlBuilder ub = forHost("http", "foo.com")
.pathSegment("foo")
.matrixParam("foo", "bar")
.matrixParam("res;=?#/erved", "value")
.pathSegment("baz");
assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString());
}
@Test
public void testUrlEncodedPathSegmentUtf8() throws Exception {
// 1 UTF-16 char
UrlBuilder ub = forHost("http", "foo.com");
ub.pathSegment("snowman").pathSegment("\u2603");
assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString());
}
@Test
public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
// musical G clef: 1d11e, has to be represented in surrogate pair form
ub.pathSegment("clef").pathSegment("\ud834\udd1e");
assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString());
}
@Test
public void testQueryParamNoPath() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.queryParam("foo", "bar");
String s = ub.toUrlString();
assertUrlEquals("http://foo.com?foo=bar", s);
}
@Test
public void testQueryParamsDuplicated() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.queryParam("foo", "bar");
ub.queryParam("foo", "bar2");
ub.queryParam("baz", "quux");
ub.queryParam("baz", "quux2");
assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString());
}
@Test
public void testEncodeQueryParams() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.queryParam("foo", "bar&=#baz");
ub.queryParam("foo", "bar?/2");
assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString());
}
@Test
public void testEncodeQueryParamWithSpaceAndPlus() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.queryParam("foo", "spa ce");
ub.queryParam("fo+o", "plus+");
assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString());
}
@Test
public void testPlusInVariousParts() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.pathSegment("has+plus")
.matrixParam("plusMtx", "pl+us")
.queryParam("plusQp", "pl+us")
.fragment("plus+frag");
assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString());
}
@Test
public void testFragment() throws Exception {
UrlBuilder ub = forHost("http", "foo.com");
ub.queryParam("foo", "bar");
ub.fragment("#frag/?");
assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString());
}
@Test
public void testAllParts() throws Exception {
UrlBuilder ub = forHost("https", "foo.bar.com", 3333);
ub.pathSegment("foo");
ub.pathSegment("bar");
ub.matrixParam("mtx1", "val1");
ub.matrixParam("mtx2", "val2");
ub.queryParam("q1", "v1");
ub.queryParam("q2", "v2");
ub.fragment("zomg it's a fragment");
assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment",
ub.toUrlString());
}
@Test
public void testIPv4Literal() throws Exception {
UrlBuilder ub = forHost("http", "127.0.0.1");
assertUrlEquals("http://127.0.0.1", ub.toUrlString());
}
@Test
public void testBadIPv4LiteralDoesntChoke() throws Exception {
UrlBuilder ub = forHost("http", "300.100.50.1");
assertUrlEquals("http://300.100.50.1", ub.toUrlString());
}
@Test
public void testIPv6LiteralLocalhost() throws Exception {
UrlBuilder ub = forHost("http", "[::1]");
assertUrlEquals("http://[::1]", ub.toUrlString());
}
@Test
public void testIPv6Literal() throws Exception {
UrlBuilder ub = forHost("http", "[2001:db8:85a3::8a2e:370:7334]");
assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString());
}
@Test
public void testEncodedRegNameSingleByte() throws Exception {
UrlBuilder ub = forHost("http", "host?name;");
assertUrlEquals("http://host%3Fname;", ub.toUrlString());
}
@Test
public void testEncodedRegNameMultiByte() throws Exception {
UrlBuilder ub = forHost("http", "snow\u2603man");
assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString());
}
@Test
public void testForceTrailingSlash() throws Exception {
UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c");
assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString());
}
@Test
public void testForceTrailingSlashWithQueryParams() throws Exception {
UrlBuilder ub =
forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar");
assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString());
}
@Test
public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws Exception {
UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1");
assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString());
}
@Test
public void testIntermingledMatrixParamsAndPathSegments() throws Exception {
UrlBuilder ub = forHost("http", "foo.com")
.pathSegments("seg1", "seg2")
.matrixParam("m1", "v1")
.pathSegment("seg3")
.matrixParam("m2", "v2");
assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString());
}
@Test
public void testFromUrlWithEverything() throws Exception {
String orig =
"https://foo.bar.com:33/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragm";
assertUrlBuilderRoundtrip(orig);
}
@Test
public void testFromUrlWithEmptyPath() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com");
}
@Test
public void testFromUrlWithEmptyPathAndSlash() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com");
}
@Test
public void testFromUrlWithPort() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com:1234");
}
@Test
public void testFromUrlWithEmptyPathSegent() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo");
}
@Test
public void testFromUrlWithEncodedHost() throws Exception {
assertUrlBuilderRoundtrip("http://f%20oo.com/bar");
}
@Test
public void testFromUrlWithEncodedPathSegment() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar");
}
@Test
public void testFromUrlWithEncodedMatrixParam() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202");
}
@Test
public void testFromUrlWithEncodedQueryParam() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2");
}
@Test
public void testFromUrlWithEncodedQueryParamDelimiter() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2");
}
@Test
public void testFromUrlWithEncodedFragment() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar");
}
@Test
public void testFromUrlWithMalformedMatrixPair() throws Exception {
try {
fromUrl(new URL("http://foo.com/foo;m1=v1=v2"));
fail();
} catch (IllegalArgumentException e) {
assertEquals("Malformed matrix param: <m1=v1=v2>", e.getMessage());
}
}
@Test
public void testFromUrlWithEmptyPathSegmentWithMatrixParams() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1");
}
@Test
public void testFromUrlWithEmptyPathWithMatrixParams() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1");
}
@Test
public void testFromUrlWithEmptyPathWithMultipleMatrixParams() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2");
}
@Test
public void testFromUrlWithPathSegmentEndingWithSemicolon() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo");
}
@Test
public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException {
try {
fromUrl(new URL("http://foo.com/fo%2o"));
fail();
} catch (IllegalArgumentException e) {
assertEquals("Invalid %-tuple <%2o>", e.getMessage());
}
}
@Test
public void testFromUrlMalformedQueryParamMultiValues() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2");
}
@Test
public void testFromUrlMalformedQueryParamNoValue() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2");
}
@Test
public void testFromUrlUnstructuredQueryWithEscapedChars() throws Exception {
assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23");
}
@Test
public void testCantUseQueryParamAfterQuery() {
UrlBuilder ub = forHost("http", "foo.com").unstructuredQuery("q");
try {
ub.queryParam("foo", "bar");
fail();
} catch (IllegalStateException e) {
assertEquals("Cannot call queryParam() when this already has an unstructured query specified",
e.getMessage());
}
}
@Test
public void testCantUseQueryAfterQueryParam() {
UrlBuilder ub = forHost("http", "foo.com").queryParam("foo", "bar");
try {
ub.unstructuredQuery("q");
fail();
} catch (IllegalStateException e) {
assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified",
e.getMessage());
}
}
@Test
public void testUnstructuredQueryWithNoSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q", forHost("http", "foo.com").unstructuredQuery("q").toUrlString());
}
@Test
public void testUnstructuredQueryWithOkSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q?/&=", forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString());
}
@Test
public void testUnstructuredQueryWithEscapedSpecialChars() throws Exception {
assertUrlEquals("http://foo.com?q%23%2B", forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString());
}
@Test
public void testClearQueryRemovesQueryParam() throws Exception {
UrlBuilder ub = forHost("http", "host")
.queryParam("foo", "bar")
.clearQuery();
assertUrlEquals("http://host", ub.toUrlString());
}
@Test
public void testClearQueryRemovesUnstructuredQuery() throws Exception {
UrlBuilder ub = forHost("http", "host")
.unstructuredQuery("foobar")
.clearQuery();
assertUrlEquals("http://host", ub.toUrlString());
}
@Test
public void testClearQueryAfterQueryParamAllowsQuery() throws Exception {
UrlBuilder ub = forHost("http", "host")
.queryParam("foo", "bar")
.clearQuery()
.unstructuredQuery("foobar");
assertUrlEquals("http://host?foobar", ub.toUrlString());
}
@Test
public void testClearQueryAfterQueryAllowsQueryParam() throws Exception {
UrlBuilder ub = forHost("http", "host")
.unstructuredQuery("foobar")
.clearQuery()
.queryParam("foo", "bar");
assertUrlEquals("http://host?foo=bar", ub.toUrlString());
}
private void assertUrlBuilderRoundtrip(String url)
throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlBuilderRoundtrip(url, url);
}
/**
* @param origUrl the url that will be used to create a URL
* @param finalUrl the URL string it should end up as
*/
private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl)
throws MalformedURLException, CharacterCodingException, URISyntaxException {
assertUrlEquals(finalUrl, fromUrl(new URL(origUrl)).toUrlString());
}
}

View file

@ -1,4 +0,0 @@
/**
* Classes for testing URL pocessing.
*/
package org.xbib.content.resource.url;

View file

@ -1,4 +1,4 @@
dependencies { dependencies {
compile project(':content-core') compile project(':content-core')
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${versions.jackson}" compile "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${project.property('jackson.version')}"
} }

View file

@ -1,10 +1,9 @@
dependencies { dependencies {
compile project(':content-core') compile project(':content-core')
compile project(':content-resource') compile project(':content-resource')
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-xml:${versions.jackson}" compile "com.fasterxml.jackson.dataformat:jackson-dataformat-xml:${project.property('jackson.version')}"
} }
tasks.withType(JavaCompile) { tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:all" << "-profile" << "compact2" options.compilerArgs << "-Xlint:all" << "-profile" << "compact2"
} }

View file

@ -206,12 +206,12 @@ public class XmlXContentGenerator extends AbstractXContentGenerator {
@Override @Override
public void writeString(String text) throws IOException { public void writeString(String text) throws IOException {
generator.writeString(XMLUtil.sanitizeXml10(text)); generator.writeString(XMLUtil.sanitize(text));
} }
@Override @Override
public void writeString(char[] text, int offset, int len) throws IOException { public void writeString(char[] text, int offset, int len) throws IOException {
generator.writeString(XMLUtil.sanitizeXml10(text, offset, len)); generator.writeString(XMLUtil.sanitize(new String(text, offset, len)));
} }
@Override @Override

View file

@ -265,10 +265,24 @@ public final class XMLUtil {
return sb.toString(); return sb.toString();
} }
public static String sanitizeToLineFeed(CharSequence string) {
StringBuilder sb = new StringBuilder();
for (int i = 0, len = string.length(); i < len; i++) {
char c = string.charAt(i);
boolean legal = c == '\u0009' || c == '\n'
|| (c >= '\u0020' && c <= '\uD7FF')
|| (c >= '\uE000' && c <= '\uFFFD');
if (legal) {
sb.append(c);
}
}
return sb.toString();
}
/** /**
* Does not work. * The pattern matching does not work.
* *
* @param sequence the charatcer sequence * @param sequence the character sequence
* @return sanitized string * @return sanitized string
*/ */
public static String sanitizeXml10(CharSequence sequence) { public static String sanitizeXml10(CharSequence sequence) {

View file

@ -189,7 +189,7 @@ public class XContentXmlBuilderTest extends Assert {
QName root = new QName("root"); QName root = new QName("root");
XContentBuilder builder = XmlXContent.contentBuilder(new XmlXParams(root)); XContentBuilder builder = XmlXContent.contentBuilder(new XmlXParams(root));
builder.startObject().field("Hello", "World\u001b").endObject(); builder.startObject().field("Hello", "World\u001b").endObject();
assertEquals("<root><Hello>World\ufffd</Hello></root>", builder.string()); assertEquals("<root><Hello>World</Hello></root>", builder.string());
} }
@Test @Test

View file

@ -1,4 +1,4 @@
dependencies { dependencies {
compile project(':content-core') compile project(':content-core')
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}" compile "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${project.property('jackson.version')}"
} }

View file

@ -1,3 +1,6 @@
group = org.xbib group = org.xbib
name = content name = content
version = 1.0.7 version = 1.1.0
jackson.version = 2.8.4
xbib-net.version = 1.0.0

View file

@ -6,7 +6,7 @@ task xbibUpload(type: Upload, dependsOn: build) {
if (project.hasProperty('xbibUsername')) { if (project.hasProperty('xbibUsername')) {
mavenDeployer { mavenDeployer {
configuration = configurations.wagon configuration = configurations.wagon
repository(url: uri('scpexe://xbib.org/repository')) { repository(url: uri('sftp://xbib.org/repository')) {
authentication(userName: xbibUsername, privateKey: xbibPrivateKey) authentication(userName: xbibUsername, privateKey: xbibPrivateKey)
} }
} }
@ -64,3 +64,7 @@ task sonatypeUpload(type: Upload, dependsOn: build) {
} }
} }
} }
nexusStaging {
packageGroup = "org.xbib"
}

View file

@ -1,8 +1,8 @@
tasks.withType(FindBugs) { tasks.withType(FindBugs) {
ignoreFailures = true ignoreFailures = true
reports { reports {
xml.enabled = true xml.enabled = false
html.enabled = false html.enabled = true
} }
} }
tasks.withType(Pmd) { tasks.withType(Pmd) {
@ -22,10 +22,8 @@ tasks.withType(Checkstyle) {
jacocoTestReport { jacocoTestReport {
reports { reports {
xml.enabled true xml.enabled = true
csv.enabled false csv.enabled = false
xml.destination "${buildDir}/reports/jacoco-xml"
html.destination "${buildDir}/reports/jacoco-html"
} }
} }

Binary file not shown.

View file

@ -1,6 +1,6 @@
#Sat Dec 03 23:47:13 CET 2016 #Mon Aug 14 19:27:00 CEST 2017
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip

6
gradlew vendored
View file

@ -33,11 +33,11 @@ DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value. # Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum" MAX_FD="maximum"
warn ( ) { warn () {
echo "$*" echo "$*"
} }
die ( ) { die () {
echo echo
echo "$*" echo "$*"
echo echo
@ -155,7 +155,7 @@ if $cygwin ; then
fi fi
# Escape application args # Escape application args
save ( ) { save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " " echo " "
} }