This commit is contained in:
Jörg Prante 2022-10-21 23:59:17 +02:00
parent da6f3145be
commit 7ef0e83364
14 changed files with 2294 additions and 0 deletions

View file

@ -19,3 +19,9 @@ The org.xbib.net.buffer "DataBuffer" classes are taken from Spring Framework, Co
https://github.com/spring-projects/spring-framework/tree/main/spring-core/src/main/java/org/springframework/core/io/buffer https://github.com/spring-projects/spring-framework/tree/main/spring-core/src/main/java/org/springframework/core/io/buffer
License: Apacche 2.0 License: Apacche 2.0
The IRI class is a modified version taken from org.apache.abdera.i18n.text
https://abdera.apache.org
License: Apacche 2.0

View file

@ -0,0 +1,803 @@
package org.xbib.net;
import java.io.IOException;
import java.net.IDN;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.xbib.net.scheme.Scheme;
import org.xbib.net.scheme.SchemeRegistry;
import org.xbib.net.util.CharUtils;
import org.xbib.net.util.InvalidCharacterException;
import org.xbib.net.util.Profile;
public class IRI implements Comparable<IRI> {
private static final Pattern IRIPATTERN =
Pattern.compile("^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\\?([^#]*))?(?:#(.*))?");
private final IRIBuilder builder;
IRI(IRIBuilder builder) {
this.builder = builder;
}
public static IRIBuilder builder() {
return new IRIBuilder();
}
public static IRI create(String iri) {
return IRI.builder().from(iri).build();
}
public String getScheme() {
return builder.scheme;
}
public String getAuthority() {
return (builder.authority != null && builder.authority.length() > 0) ? builder.authority : null;
}
public String getFragment() {
return builder.fragment;
}
public String getHost() {
return (builder.host != null && builder.host.length() > 0) ? builder.host : null;
}
public String getPath() {
return builder.path;
}
public int getPort() {
return builder.port;
}
public String getQuery() {
return builder.query;
}
public String getSchemeSpecificPart() {
return builder.schemeSpecificPart;
}
public String getUserInfo() {
return builder.userinfo;
}
public boolean isAbsolute() {
return builder.scheme != null;
}
public boolean isOpaque() {
return builder.path == null;
}
public boolean isPathAbsolute() {
String s = getPath();
return s != null && s.length() > 0 && s.charAt(0) == '/';
}
public boolean isSameDocumentReference() {
return builder.scheme == null && builder.authority == null
&& (builder.path == null || builder.path.length() == 0 || ".".equals(builder.path))
&& builder.query == null;
}
public String getASCIIHost() {
return builder.getASCIIHost();
}
public String getASCIIAuthority() {
return builder.getASCIIAuthority();
}
public String getASCIIFragment() {
return builder.getASCIIFragment();
}
public String getASCIIPath() {
return builder.getASCIIPath();
}
public String getASCIIQuery() {
return builder.getASCIIQuery();
}
public String getASCIIUserInfo() {
return builder.getASCIIUserInfo();
}
public String getASCIISchemeSpecificPart() {
return builder.getASCIISchemeSpecificPart();
}
public IRI resolve(IRI iri) {
return resolve(this, iri);
}
public IRI resolve(String iri) {
return resolve(this, IRI.builder().from(iri).build());
}
public static IRI resolve(IRI b, IRI c) {
if (c == null) {
return null;
}
if ("".equals(c.toString()) || "#".equals(c.toString())
|| ".".equals(c.toString())
|| "./".equals(c.toString())) {
return b;
}
if (b == null) {
return c;
}
if (c.isOpaque() || b.isOpaque()) {
return c;
}
if (c.isSameDocumentReference()) {
String cfragment = c.getFragment();
String bfragment = b.getFragment();
if ((cfragment == null && bfragment == null) || (cfragment != null && cfragment.equals(bfragment))) {
return b;
} else {
return IRI.builder()
.scheme(b.builder.scheme)
.authority(b.builder.authority)
.userinfo(b.builder.userinfo)
.host(b.builder.host)
.port(b.builder.port)
.path(normalizePath(b.builder.path))
.query(b.builder.query)
.fragment(cfragment)
.build();
}
}
if (c.isAbsolute()) {
return c;
}
String scheme = b.builder.scheme;
String query = c.getQuery();
String fragment = c.getFragment();
String userinfo;
String authority;
String host;
int port;
String path;
if (c.getAuthority() == null) {
authority = b.getAuthority();
userinfo = b.getUserInfo();
host = b.getHost();
port = b.getPort();
path = c.isPathAbsolute() ? normalizePath(c.getPath()) : resolve(b.getPath(), c.getPath());
} else {
authority = c.getAuthority();
userinfo = c.getUserInfo();
host = c.getHost();
port = c.getPort();
path = normalizePath(c.getPath());
}
return IRI.builder()
.scheme(scheme)
.authority(authority)
.userinfo(userinfo)
.host(host)
.port(port)
.path(path)
.query(query)
.fragment(fragment)
.build();
}
public static IRI relativize(IRI b, IRI c) {
if (c.isOpaque() || b.isOpaque()) {
return c;
}
if ((b.builder.scheme == null && c.builder.scheme != null) || (b.builder.scheme != null && c.builder.scheme == null)
|| (b.builder.scheme != null && !b.builder.scheme.equalsIgnoreCase(c.builder.scheme))) {
return c;
}
String bpath = normalizePath(b.getPath());
String cpath = normalizePath(c.getPath());
if (!bpath.equals(cpath)) {
if (bpath.charAt(bpath.length() - 1) != '/') {
bpath += "/";
}
if (!cpath.startsWith(bpath)) {
return c;
}
}
return IRI.builder()
.scheme(null)
.authority(null)
.userinfo(null)
.host(null)
.port(-1)
.path(normalizePath(cpath.substring(bpath.length())))
.query(c.getQuery())
.fragment(c.getFragment())
.build();
}
private static String normalizePath(String path) {
if (path == null || path.length() == 0) {
return "/";
}
String[] segments = path.split("/");
if (segments.length < 2) {
return path;
}
StringBuilder buf = new StringBuilder("/");
for (int n = 0; n < segments.length; n++) {
String segment = segments[n].intern();
if (".".equals(segment)) {
segments[n] = null;
}
}
PercentDecoder percentDecoder = new PercentDecoder();
for (String segment : segments) {
if (segment != null) {
if (buf.length() > 1) {
buf.append('/');
}
try {
buf.append(PercentEncoders.getMatrixEncoder(StandardCharsets.UTF_8).encode(percentDecoder.decode(segment)));
} catch (IOException e) {
//logger.log(Level.FINE, e.getMessage(), e);
}
}
}
if (path.endsWith("/") || path.endsWith("/.")) {
buf.append('/');
}
return buf.toString();
}
private static String resolve(String bpath, String cpath) {
if (bpath == null && cpath == null) {
return null;
}
if (bpath == null) {
return (!cpath.startsWith("/")) ? "/" + cpath : cpath;
}
if (cpath == null) {
return bpath;
}
StringBuilder buf = new StringBuilder("");
int n = bpath.lastIndexOf('/');
if (n > -1) {
buf.append(bpath, 0, n + 1);
}
if (cpath.length() != 0) {
buf.append(cpath);
}
if (buf.charAt(0) != '/') {
buf.insert(0, '/');
}
return normalizePath(buf.toString());
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
String s = getScheme();
if (s != null && !s.isEmpty()) {
buf.append(s).append(':');
}
buf.append(getSchemeSpecificPart());
return buf.toString();
}
public String toEncodedString() throws IOException {
return PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(toString());
}
public String toASCIIString() {
StringBuilder buf = new StringBuilder();
String s = getScheme();
if (s != null && !s.isEmpty()) {
buf.append(s).append(':');
}
buf.append(getASCIISchemeSpecificPart());
return buf.toString();
}
public String toBIDIString() {
return CharUtils.wrapBidi(toString(), CharUtils.LRE);
}
public URI toURI() throws URISyntaxException {
return new URI(toASCIIString());
}
public java.net.URL toURL() throws MalformedURLException, URISyntaxException {
return toURI().toURL();
}
@Override
public int hashCode() {
final int p = 31;
int result = 1;
result = p * result + ((builder.authority == null) ? 0 : builder.authority.hashCode());
result = p * result + ((builder.fragment == null) ? 0 : builder.fragment.hashCode());
result = p * result + ((builder.host == null) ? 0 : builder.host.hashCode());
result = p * result + ((builder.path == null) ? 0 : builder.path.hashCode());
result = p * result + builder.port;
result = p * result + ((builder.query == null) ? 0 : builder.query.hashCode());
result = p * result + ((builder.scheme == null) ? 0 : builder.scheme.hashCode());
result = p * result + ((builder.userinfo == null) ? 0 : builder.userinfo.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final IRI other = (IRI) obj;
if (builder.authority == null) {
if (other.builder.authority != null) {
return false;
}
} else if (!builder.authority.equals(other.builder.authority)) {
return false;
}
if (builder.fragment == null) {
if (other.builder.fragment != null) {
return false;
}
} else if (!builder.fragment.equals(other.builder.fragment)) {
return false;
}
if (builder.host == null) {
if (other.builder.host != null) {
return false;
}
} else if (!builder.host.equals(other.builder.host)) {
return false;
}
if (builder.path == null) {
if (other.builder.path != null) {
return false;
}
} else if (!builder.path.equals(other.builder.path)) {
return false;
}
if (builder.port != other.builder.port) {
return false;
}
if (builder.query == null) {
if (other.builder.query != null) {
return false;
}
} else if (!builder.query.equals(other.builder.query)) {
return false;
}
if (builder.scheme == null) {
if (other.builder.scheme != null) {
return false;
}
} else if (!builder.scheme.equals(other.builder.scheme)) {
return false;
}
if (builder.userinfo == null) {
return other.builder.userinfo == null;
} else {
return builder.userinfo.equals(other.builder.userinfo);
}
}
@Override
public int compareTo(IRI that) {
int c;
if ((c = compareIgnoringCase(builder.scheme, that.builder.scheme)) != 0) {
return c;
}
if (isOpaque()) {
if (that.isOpaque()) {
// Both opaque
if ((c = compare(builder.schemeSpecificPart, that.builder.schemeSpecificPart)) != 0) {
return c;
}
return compare(builder.fragment, that.builder.fragment);
}
return +1;
} else if (that.isOpaque()) {
return -1;
}
// Hierarchical
if ((builder.host != null) && (that.builder.host != null)) {
// Both server-based
if ((c = compare(builder.userinfo, that.builder.userinfo)) != 0) {
return c;
}
if ((c = compareIgnoringCase(builder.host, that.builder.host)) != 0) {
return c;
}
if ((c = builder.port - that.builder.port) != 0) {
return c;
}
} else {
if ((c = compare(builder.authority, that.builder.authority)) != 0) {
return c;
}
}
if ((c = compare(builder.path, that.builder.path)) != 0) {
return c;
}
if ((c = compare(builder.query, that.builder.query)) != 0) {
return c;
}
return compare(builder.fragment, that.builder.fragment);
}
private int compare(String s, String t) {
if (s != null) {
if (s.equals(t)) {
return 0;
}
if (t != null) {
return s.compareTo(t);
} else {
return +1;
}
} else {
return -1;
}
}
private int compareIgnoringCase(String s, String t) {
if (s != null) {
if (s.equals(t)) {
return 0;
}
if (t != null) {
int sn = s.length();
int tn = t.length();
int n = Math.min(sn, tn);
for (int i = 0; i < n; i++) {
int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
if (c != 0) {
return c;
}
}
return sn - tn;
}
return +1;
} else {
return -1;
}
}
private int toLower(char c) {
if ((c >= 'A') && (c <= 'Z')) {
return c + ('a' - 'A');
}
return c;
}
/**
*
*/
public static class IRIBuilder {
final SchemeRegistry reg = SchemeRegistry.getInstance();
Scheme schemeClass;
String scheme;
String schemeSpecificPart;
String authority;
String userinfo;
String host;
int port = -1;
String path;
String query;
String fragment;
private String asciiHost;
private String asciiAuthority;
private String asciiUserinfo;
private String asciiSchemeSpecificPart;
private String asciiPath;
private String asciiQuery;
private String asciiFragment;
private IRIBuilder() {
}
public IRIBuilder from(String string) {
parse(CharUtils.stripBidi(string));
authorityAndSchemeSpecificPart();
return this;
}
public IRIBuilder from(URI uri) {
scheme = uri.getScheme();
schemeClass = reg.getScheme(scheme);
authority = uri.getAuthority();
path = uri.getPath();
query = uri.getQuery();
fragment = uri.getFragment();
parseAuthority();
authorityAndSchemeSpecificPart();
return this;
}
public IRIBuilder from(IRI uri) {
scheme = uri.getScheme();
schemeClass = reg.getScheme(scheme);
authority = uri.getAuthority();
path = uri.getPath();
query = uri.getQuery();
fragment = uri.getFragment();
parseAuthority();
authorityAndSchemeSpecificPart();
return this;
}
public IRIBuilder from(String scheme, String schemeSpecificPart, String fragment) {
this.scheme = scheme.toLowerCase();
this.schemeSpecificPart = schemeSpecificPart;
this.fragment = fragment;
authorityAndSchemeSpecificPart();
return this;
}
public IRIBuilder scheme(String scheme) {
this.scheme = scheme;
this.schemeClass = reg.getScheme(scheme);
return this;
}
public IRIBuilder schemeSpecificPart(String schemeSpecificPart) {
this.schemeSpecificPart = schemeSpecificPart;
return this;
}
public IRIBuilder curie(String prefix, String path) {
this.scheme = prefix;
this.path = path;
return this;
}
public IRIBuilder curie(String schemeAndPath) {
int pos = schemeAndPath.indexOf(':');
this.scheme = pos > 0 ? schemeAndPath.substring(0, pos) : null;
this.path = pos > 0 ? schemeAndPath.substring(pos + 1) : schemeAndPath;
return this;
}
public IRIBuilder authority(String authority) {
this.authority = authority;
return this;
}
public IRIBuilder userinfo(String userinfo) {
this.userinfo = userinfo;
return this;
}
public IRIBuilder host(String host) {
this.host = host;
return this;
}
public IRIBuilder port(int port) {
this.port = port;
return this;
}
public IRIBuilder path(String path) {
this.path = path;
return this;
}
public IRIBuilder query(String query) {
this.query = query;
return this;
}
public IRIBuilder fragment(String fragment) {
this.fragment = fragment;
return this;
}
public IRI build() {
return new IRI(this);
}
private void parse(String iri) {
try {
Matcher irim = IRIPATTERN.matcher(iri);
if (irim.find()) {
scheme = irim.group(1);
schemeClass = reg.getScheme(scheme);
authority = irim.group(2);
path = irim.group(3);
query = irim.group(4);
fragment = irim.group(5);
parseAuthority();
try {
CharUtils.verify(scheme, Profile.SCHEME);
CharUtils.verify(path, Profile.IPATH);
CharUtils.verify(query, Profile.IQUERY);
CharUtils.verify(fragment, Profile.IFRAGMENT);
} catch (InvalidCharacterException e) {
throw new IRISyntaxException(e);
}
} else {
throw new IRISyntaxException("invalid Syntax");
}
} catch (IRISyntaxException e) {
throw e;
} catch (Exception e) {
throw new IRISyntaxException(e);
}
}
private void parseAuthority() {
if (authority != null) {
// [ <userinfo> '@' ] <host> [ ':' <port> ]
int pos = authority.lastIndexOf('@');
userinfo = pos >= 0 ? authority.substring(0, pos) : null;
String s = pos >= 0 ? authority.substring(pos + 1) : authority;
pos = s.indexOf(':');
host = pos >= 0 ? s.substring(0, pos) : s;
port = pos >= 0 ? Integer.parseInt(s.substring(pos + 1)) : -1;
try {
CharUtils.verify(userinfo, Profile.IUSERINFO);
CharUtils.verify(host, Profile.IHOST);
} catch (InvalidCharacterException e) {
throw new IRISyntaxException(e);
}
}
}
private void authorityAndSchemeSpecificPart() {
if (authority == null && (userinfo != null || host != null)) {
StringBuilder buf = new StringBuilder();
buildAuthority(buf, userinfo, host, port);
authority = (buf.length() != 0) ? buf.toString() : null;
}
StringBuilder buf = new StringBuilder();
buildSchemeSpecificPart(buf, authority, path, query, fragment);
schemeSpecificPart = buf.toString();
}
private static void buildSchemeSpecificPart(StringBuilder buf, String authority, String path, String query,
String fragment) {
if (authority != null) {
buf.append("//");
buf.append(authority);
}
if (path != null && path.length() > 0) {
buf.append(path);
}
if (query != null) {
buf.append('?');
buf.append(query);
}
if (fragment != null) {
buf.append('#');
buf.append(fragment);
}
}
public String getASCIIHost() {
if (host != null && asciiHost == null) {
if (host.startsWith("[")) {
asciiHost = host;
} else {
asciiHost = IDN.toASCII(host);
}
}
return (asciiHost != null && asciiHost.length() > 0) ? asciiHost : null;
}
private String getASCIIAuthority() {
if (authority != null && asciiAuthority == null) {
asciiAuthority = buildASCIIAuthority();
}
return asciiAuthority != null && asciiAuthority.length() > 0 ? asciiAuthority : null;
}
private String buildASCIIAuthority() {
StringBuilder buf = new StringBuilder();
buildAuthority(buf, getASCIIUserInfo(), getASCIIHost(), port);
return buf.toString();
}
private static void buildAuthority(StringBuilder buf, String aui, String ah, int port) {
if (aui != null && aui.length() != 0) {
buf.append(aui);
buf.append('@');
}
if (ah != null && ah.length() != 0) {
buf.append(ah);
}
if (port != -1) {
buf.append(':');
buf.append(port);
}
}
private String getASCIIFragment() {
if (fragment != null && asciiFragment == null) {
try {
asciiFragment = PercentEncoders.getFragmentEncoder(StandardCharsets.UTF_8).encode(fragment);
} catch (IOException e) {
//logger.log(Level.FINE, e.getMessage(), e);
}
}
return asciiFragment;
}
private String getASCIIPath() {
if (path != null && asciiPath == null) {
try {
asciiPath = PercentEncoders.getPathEncoder(StandardCharsets.UTF_8).encode(path);
} catch (IOException e) {
//logger.log(Level.FINE, e.getMessage(), e);
}
}
return asciiPath;
}
public String getASCIIQuery() {
if (query != null && asciiQuery == null) {
try {
asciiQuery = PercentEncoders.getQueryEncoder(StandardCharsets.UTF_8).encode(query);
} catch (IOException e) {
//logger.log(Level.FINE, e.getMessage(), e);
}
}
return asciiQuery;
}
public String getASCIIUserInfo() {
if (userinfo != null && asciiUserinfo == null) {
try {
asciiUserinfo = PercentEncoders.getUnreservedEncoder(StandardCharsets.UTF_8).encode(userinfo);
} catch (IOException e) {
//logger.log(Level.FINE, e.getMessage(), e);
}
}
return asciiUserinfo;
}
public String getASCIISchemeSpecificPart() {
if (asciiSchemeSpecificPart == null) {
StringBuilder buf = new StringBuilder();
buildSchemeSpecificPart(buf, getASCIIAuthority(), getASCIIPath(), getASCIIQuery(), getASCIIFragment());
asciiSchemeSpecificPart = buf.toString();
}
return asciiSchemeSpecificPart;
}
}
}

View file

@ -0,0 +1,17 @@
package org.xbib.net;
/**
*
*/
@SuppressWarnings("serial")
public class IRISyntaxException extends RuntimeException {
IRISyntaxException(String message) {
super(message);
}
IRISyntaxException(Throwable cause) {
super(cause);
}
}

View file

@ -0,0 +1,28 @@
package org.xbib.net.util;
class CharArrayCodepointIterator extends CodepointIterator {
protected char[] buffer;
CharArrayCodepointIterator(char[] buffer) {
this(buffer, 0, buffer.length);
}
CharArrayCodepointIterator(char[] buffer, int n, int e) {
this.buffer = buffer;
this.position = n;
this.limit = Math.min(buffer.length - n, e);
}
@Override
protected char get() {
return (position < limit) ? buffer[position++] : (char) -1;
}
@Override
protected char get(int index) {
if (index < 0 || index >= limit) {
throw new ArrayIndexOutOfBoundsException(index);
}
return buffer[index];
}
}

View file

@ -0,0 +1,25 @@
package org.xbib.net.util;
class CharSequenceCodepointIterator extends CodepointIterator {
private final CharSequence buffer;
CharSequenceCodepointIterator(CharSequence buffer) {
this(buffer, 0, buffer.length());
}
CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
this.buffer = buffer;
this.position = n;
this.limit = Math.min(buffer.length() - n, e);
}
@Override
protected char get() {
return buffer.charAt(position++);
}
@Override
protected char get(int index) {
return buffer.charAt(index);
}
}

View file

@ -0,0 +1,597 @@
package org.xbib.net.util;
/**
* General utilities for dealing with Unicode characters.
*/
public final class CharUtils {
public static final char LRE = 0x202A;
public static final char RLE = 0x202B;
public static final char LRO = 0x202D;
public static final char RLO = 0x202E;
public static final char LRM = 0x200E;
public static final char RLM = 0x200F;
public static final char PDF = 0x202C;
private CharUtils() {
}
/**
* True if the character is a valid unicode codepoint.
* @param c char
* @return true if the character is a valid unicode codepoint
*/
public static boolean isValid(int c) {
return c >= 0x000000 && c <= 0x10ffff;
}
/**
* True if the character is a valid unicode codepoint.
* @param c code point
* @return true if the character is a valid unicode codepoint
*/
public static boolean isValid(Codepoint c) {
return isValid(c.getValue());
}
/**
* True if all the characters in chars are within the set [low,high].
* @param chars chars
* @param low low
* @param high high
* @return true if all the characters in chars are within the set [low,high]
*/
public static boolean inRange(char[] chars, char low, char high) {
for (char aChar : chars) {
if (aChar < low || aChar > high) {
return false;
}
}
return true;
}
/**
* True if all the characters in chars are within the set [low,high].
* @param chars chars
* @param low low
* @param high high
* @return true if all the characters in chars are within the set [low,high]
*/
public static boolean inRange(char[] chars, int low, int high) {
for (int i = 0; i < chars.length; i++) {
char n = chars[i];
Codepoint cp =
(isHighSurrogate(n) && i + 1 < chars.length && isLowSurrogate(chars[i + 1]))
? toSupplementary(n, chars[i++]) : new Codepoint(n);
int c = cp.getValue();
if (c < low || c > high) {
return false;
}
}
return true;
}
/**
* True if the codepoint is within the set [low,high].
* @param codepoint code point
* @param low low
* @param high high
* @return true if the codepoint is within the set [low,high]
*/
public static boolean inRange(int codepoint, int low, int high) {
return codepoint >= low && codepoint <= high;
}
/**
* Get the high surrogate for a particular unicode codepoint.
* @param c char
* @return high surrugate
*/
public static char getHighSurrogate(int c) {
return c >= 0x10000 ? (char) ((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
}
/**
* Get the low surrogate for a particular unicode codepoint.
* @param c char
* @return low surrogate
*/
public static char getLowSurrogate(int c) {
return c >= 0x10000 ? (char) (0xDC00 + (c & 0x3FF)) : (char) c;
}
/**
* True if the specified char is a high surrogate.
* @param c char
* @return true if the specified char is a high surrogate
*/
public static boolean isHighSurrogate(char c) {
return c <= '\uDBFF' && c >= '\uD800';
}
/**
* True if the specified char is a low surrogate.
* @param c char
* @return true if the specified char is a low surrogate
*/
public static boolean isLowSurrogate(char c) {
return c <= '\uDFFF' && c >= '\uDC00';
}
/**
* True if the specified character is supplemental.
* @param c char
* @return true if the specified character is supplemental
*/
public static boolean isSupplementary(int c) {
return c <= 0x10ffff && c >= 0x010000;
}
/**
* True if the two chars represent a surrogate pair.
* @param high high char
* @param low low char
* @return true if the two chars represent a surrogate pair
*/
public static boolean isSurrogatePair(char high, char low) {
return isHighSurrogate(high) && isLowSurrogate(low);
}
/**
* Converts the high and low surrogate into a supplementary codepoint.
* @param high high char
* @param low low char
* @return code point
*/
public static Codepoint toSupplementary(char high, char low) {
if (!isHighSurrogate(high)) {
throw new IllegalArgumentException("Invalid High Surrogate");
}
if (!isLowSurrogate(low)) {
throw new IllegalArgumentException("Invalid Low Surrogate");
}
return new Codepoint(((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000);
}
/**
* Return the codepoint at the given location, automatically dealing with surrogate pairs.
* @param s string
* @param i location
* @return code point
*/
public static Codepoint codepointAt(String s, int i) {
char c = s.charAt(i);
if (c < 0xD800 || c > 0xDFFF) {
return new Codepoint(c);
}
if (isHighSurrogate(c) && s.length() != i) {
char low = s.charAt(i + 1);
if (isLowSurrogate(low)) {
return toSupplementary(c, low);
}
} else if (isLowSurrogate(c) && i >= 1) {
char high = s.charAt(i - 1);
if (isHighSurrogate(high)) {
return toSupplementary(high, c);
}
}
return new Codepoint(c);
}
/**
* Return the number of characters used to represent the codepoint (will return 1 or 2).
* @param c code point
* @return the number of characters used to represent the codepoint
*/
public static int length(Codepoint c) {
return c.getCharCount();
}
/**
* Return the number of characters used to represent the codepoint (will return 1 or 2).
* @param c code point
* @return the number of characters used to represent the codepoint
*/
public static int length(int c) {
return new Codepoint(c).getCharCount();
}
/**
* Return the total number of codepoints in the buffer. Each surrogate pair counts as a single codepoint.
* @param c code point
* @return the total number of codepoints in the buffer
*/
public static int length(CharSequence c) {
return length(CodepointIterator.forCharSequence(c));
}
/**
* Return the total number of codepoints in the buffer. Each surrogate pair counts as a single codepoint.
* @param c chars
* @return the total number of codepoints in the buffer
*/
public static int length(char[] c) {
return length(CodepointIterator.forCharArray(c));
}
private static int length(CodepointIterator ci) {
int n = 0;
while (ci.hasNext()) {
ci.next();
n++;
}
return n;
}
private static String supplementaryToString(int c) {
return String.valueOf(getHighSurrogate(c)) + getLowSurrogate(c);
}
/**
* Return the String representation of the codepoint, automatically dealing with surrogate pairs.
* @param c char
* @return string representation of the codepoint
*/
public static String toString(int c) {
return isSupplementary(c) ? supplementaryToString(c) : String.valueOf((char) c);
}
/**
* Removes leading and trailing bidi controls from the string.
* @param string string
* @return string without bidi controls
*/
public static String stripBidi(String string) {
String s = string;
if (s == null || s.length() <= 1) {
return s;
}
if (isBidi(s.charAt(0))) {
s = s.substring(1);
}
if (isBidi(s.charAt(s.length() - 1))) {
s = s.substring(0, s.length() - 1);
}
return s;
}
private static String wrap(String s, char c1, char c2) {
StringBuilder buf = new StringBuilder(s);
if (buf.length() > 1) {
if (buf.charAt(0) != c1) {
buf.insert(0, c1);
}
if (buf.charAt(buf.length() - 1) != c2) {
buf.append(c2);
}
}
return buf.toString();
}
/**
* Wrap the string with the specified bidi control.
* @param s string
* @param c char
* @return string with specified bidi control
*/
public static String wrapBidi(String s, char c) {
switch (c) {
case RLE:
return wrap(s, RLE, PDF);
case RLO:
return wrap(s, RLO, PDF);
case LRE:
return wrap(s, LRE, PDF);
case LRO:
return wrap(s, LRO, PDF);
case RLM:
return wrap(s, RLM, RLM);
case LRM:
return wrap(s, LRM, LRM);
default:
return s;
}
}
/**
* True if the codepoint is a digit.
* @param codepoint code point
* @return true if the codepoint is a digit
*/
public static boolean isDigit(int codepoint) {
return inRange(codepoint, '0', '9');
}
/**
* True if the codepoint is part of the ASCII alphabet (a-z, A-Z).
* @param codepoint code point
* @return true if the codepoint is a digit
*/
public static boolean isAlpha(int codepoint) {
return inRange(codepoint, 'A', 'Z') || inRange(codepoint, 'a', 'z');
}
/**
* True if isAlpha and isDigit both return true.
* @param codepoint code point
* @return true if isAlpha and isDigit both return true
*/
public static boolean isAlphaDigit(int codepoint) {
return isDigit(codepoint) || isAlpha(codepoint);
}
public static boolean isHex(int codepoint) {
return isDigit(codepoint) || inRange(codepoint, 'a', 'f') || inRange(codepoint, 'A', 'F');
}
/**
* True if the codepoint is a bidi control character.
* @param codepoint code point
* @return true if the codepoint is a bidi control character
*/
public static boolean isBidi(int codepoint) {
return codepoint == LRM ||
codepoint == RLM ||
codepoint == LRE ||
codepoint == RLE ||
codepoint == LRO ||
codepoint == RLO ||
codepoint == PDF;
}
public static boolean isPctEnc(int codepoint) {
return codepoint == '%' || isDigit(codepoint) ||
inRange(codepoint, 'A', 'F') ||
inRange(codepoint, 'a', 'f');
}
public static boolean isMark(int codepoint) {
return codepoint == '-' ||
codepoint == '_' ||
codepoint == '.' ||
codepoint == '!' ||
codepoint == '~' ||
codepoint == '*' ||
codepoint == '\\' ||
codepoint == '\'' ||
codepoint == '(' ||
codepoint == ')';
}
public static boolean isUnreserved(int codepoint) {
return isAlphaDigit(codepoint) ||
codepoint == '-' ||
codepoint == '.' ||
codepoint == '_' ||
codepoint == '~';
}
public static boolean isReserved(int codepoint) {
return codepoint == '$' ||
codepoint == '&' ||
codepoint == '+' ||
codepoint == ',' ||
codepoint == '/' ||
codepoint == ':' ||
codepoint == ';' ||
codepoint == '=' ||
codepoint == '?' ||
codepoint == '@' ||
codepoint == '[' ||
codepoint == ']';
}
public static boolean isGenDelim(int codepoint) {
return codepoint == '#' || codepoint == '/'
|| codepoint == ':'
|| codepoint == '?'
|| codepoint == '@'
|| codepoint == '['
|| codepoint == ']';
}
public static boolean isSubDelim(int codepoint) {
return codepoint == '!' ||
codepoint == '$' ||
codepoint == '&' ||
codepoint == '\'' ||
codepoint == '(' ||
codepoint == ')' ||
codepoint == '*' ||
codepoint == '+' ||
codepoint == ',' ||
codepoint == ';' ||
codepoint == '=' ||
codepoint == '\\';
}
public static boolean isPchar(int codepoint) {
return isUnreserved(codepoint) || codepoint == ':'
|| codepoint == '@'
|| codepoint == '&'
|| codepoint == '='
|| codepoint == '+'
|| codepoint == '$'
|| codepoint == ',';
}
public static boolean isPath(int codepoint) {
return isPchar(codepoint) || codepoint == ';' || codepoint == '/' || codepoint == '%' || codepoint == ',';
}
public static boolean isPathNoDelims(int codepoint) {
return isPath(codepoint) && !isGenDelim(codepoint);
}
public static boolean isScheme(int codepoint) {
return isAlphaDigit(codepoint) || codepoint == '+' || codepoint == '-' || codepoint == '.';
}
public static boolean isUserInfo(int codepoint) {
return isUnreserved(codepoint) || isSubDelim(codepoint) || isPctEnc(codepoint);
}
public static boolean isQuery(int codepoint) {
return isPchar(codepoint) || codepoint == ';' || codepoint == '/' || codepoint == '?' || codepoint == '%';
}
public static boolean isFragment(int codepoint) {
return isPchar(codepoint) || codepoint == '/' || codepoint == '?' || codepoint == '%';
}
public static boolean isUcsChar(int codepoint) {
return inRange(codepoint, '\u00A0', '\uD7FF') ||
inRange(codepoint, '\uF900', '\uFDCF') ||
inRange(codepoint, '\uFDF0', '\uFFEF') ||
inRange(codepoint, 0x10000, 0x1FFFD) ||
inRange(codepoint, 0x20000, 0x2FFFD) ||
inRange(codepoint, 0x30000, 0x3FFFD) ||
inRange(codepoint, 0x40000, 0x4FFFD) ||
inRange(codepoint, 0x50000, 0x5FFFD) ||
inRange(codepoint, 0x60000, 0x6FFFD) ||
inRange(codepoint, 0x70000, 0x7FFFD) ||
inRange(codepoint, 0x80000, 0x8FFFD) ||
inRange(codepoint, 0x90000, 0x9FFFD) ||
inRange(codepoint, 0xA0000, 0xAFFFD) ||
inRange(codepoint, 0xB0000, 0xBFFFD) ||
inRange(codepoint, 0xC0000, 0xCFFFD) ||
inRange(codepoint, 0xD0000, 0xDFFFD) ||
inRange(codepoint, 0xE1000, 0xEFFFD);
}
public static boolean isIprivate(int codepoint) {
return inRange(codepoint, '\uE000', '\uF8FF') ||
inRange(codepoint, 0xF0000, 0xFFFFD) ||
inRange(codepoint, 0x100000, 0x10FFFD);
}
public static boolean isIunreserved(int codepoint) {
return isAlphaDigit(codepoint) || isMark(codepoint) || isUcsChar(codepoint);
}
public static boolean isIpchar(int codepoint) {
return isIunreserved(codepoint) ||
isSubDelim(codepoint) ||
codepoint == ':' ||
codepoint == '@' ||
codepoint == '&' ||
codepoint == '=' ||
codepoint == '+' ||
codepoint == '$';
}
public static boolean isIpath(int codepoint) {
return isIpchar(codepoint) ||
codepoint == ';' ||
codepoint == '/' ||
codepoint == '%' ||
codepoint == ',';
}
public static boolean isIpathnodelims(int codepoint) {
return isIpath(codepoint) && !isGenDelim(codepoint);
}
public static boolean isIquery(int codepoint) {
return isIpchar(codepoint) ||
isIprivate(codepoint) ||
codepoint == ';' ||
codepoint == '/' ||
codepoint == '?' ||
codepoint == '%';
}
public static boolean isIfragment(int codepoint) {
return isIpchar(codepoint) || isIprivate(codepoint)
|| codepoint == '/'
|| codepoint == '?'
|| codepoint == '%';
}
public static boolean isIregname(int codepoint) {
return isIunreserved(codepoint) || codepoint == '!'
|| codepoint == '$'
|| codepoint == '&'
|| codepoint == '\''
|| codepoint == '('
|| codepoint == ')'
|| codepoint == '*'
|| codepoint == '+'
|| codepoint == ','
|| codepoint == ';'
|| codepoint == '='
|| codepoint == '"';
}
public static boolean isIpliteral(int codepoint) {
return isHex(codepoint) || codepoint == ':'
|| codepoint == '['
|| codepoint == ']';
}
public static boolean isIhost(int codepoint) {
return isIregname(codepoint) || isIpliteral(codepoint);
}
public static boolean isRegname(int codepoint) {
return isUnreserved(codepoint) || codepoint == '!'
|| codepoint == '$'
|| codepoint == '&'
|| codepoint == '\''
|| codepoint == '('
|| codepoint == ')'
|| codepoint == '*'
|| codepoint == '+'
|| codepoint == ','
|| codepoint == ';'
|| codepoint == '='
|| codepoint == '"';
}
public static boolean isIuserinfo(int codepoint) {
return isIunreserved(codepoint) || codepoint == ';'
|| codepoint == ':'
|| codepoint == '&'
|| codepoint == '='
|| codepoint == '+'
|| codepoint == '$'
|| codepoint == ',';
}
public static boolean isIserver(int codepoint) {
return isIuserinfo(codepoint) || isIregname(codepoint)
|| isAlphaDigit(codepoint)
|| codepoint == '.'
|| codepoint == ':'
|| codepoint == '@'
|| codepoint == '['
|| codepoint == ']'
|| codepoint == '%'
|| codepoint == '-';
}
/**
* Verifies a sequence of codepoints using the specified filter.
* @param ci code point iterator
* @param profile profile
*/
public static void verify(CodepointIterator ci, Profile profile) {
CodepointIterator rci = CodepointIterator.restrict(ci, profile.filter());
while (rci.hasNext()) {
rci.next();
}
}
/**
* Verifies a sequence of codepoints using the specified profile.
* @param s string
* @param profile profile
*/
public static void verify(String s, Profile profile) {
if (s == null) {
return;
}
verify(CodepointIterator.forCharSequence(s), profile);
}
}

View file

@ -0,0 +1,92 @@
package org.xbib.net.util;
/**
* Represents a single Unicode Codepoint.
*/
public class Codepoint implements Comparable<Codepoint> {
private final int value;
/**
* Create a codepoint from a single char.
* @param value char
*/
public Codepoint(char value) {
this((int) value);
}
/**
* Create a codepoint from a specific integer value.
* @param value value
*/
public Codepoint(int value) {
if (value < 0) {
throw new IllegalArgumentException("invalid codepoint");
}
this.value = value;
}
/**
* The codepoint value.
* @return value
*/
public int getValue() {
return value;
}
@Override
public int compareTo(Codepoint o) {
return value < o.value ? -1 : value == o.value ? 0 : 1;
}
@Override
public String toString() {
return CharUtils.toString(value);
}
public char[] toChars() {
return toString().toCharArray();
}
/**
* Get the number of chars necessary to represent this codepoint. Returns 2 if this is a supplementary codepoint.
* @return char count
*/
public int getCharCount() {
return toChars().length;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + value;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final Codepoint other = (Codepoint) obj;
return value == other.value;
}
/**
* Get the next codepoint.
* @return next code point
*/
public Codepoint next() {
if (value == 0x10ffff) {
throw new IndexOutOfBoundsException();
}
return new Codepoint(value + 1);
}
}

View file

@ -0,0 +1,10 @@
package org.xbib.net.util;
/**
* Filters are used in a variety of ways to filter or verify unicode codepoints.
*/
@FunctionalInterface
public interface CodepointFilter {
boolean accept(int ch);
}

View file

@ -0,0 +1,268 @@
package org.xbib.net.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Provides an iterator over Unicode Codepoints.
*/
public abstract class CodepointIterator implements Iterator<Codepoint> {
protected int position = -1;
protected int limit = -1;
public CodepointIterator() {
}
/**
* Get a CodepointIterator for the specified char array.
* @param array char array
* @return code point iterator
*/
public static CodepointIterator forCharArray(char[] array) {
return new CharArrayCodepointIterator(array);
}
/**
* Get a CodepointIterator for the specified CharSequence.
* @param seq char sequence
* @return code point iterator
*/
public static CodepointIterator forCharSequence(CharSequence seq) {
return new CharSequenceCodepointIterator(seq);
}
public static CodepointIterator restrict(CodepointIterator ci, CodepointFilter filter) {
return new RestrictedCodepointIterator(ci, filter, false);
}
public static CodepointIterator restrict(CodepointIterator ci, CodepointFilter filter, boolean scanning) {
return new RestrictedCodepointIterator(ci, filter, scanning);
}
public static CodepointIterator restrict(CodepointIterator ci, CodepointFilter filter, boolean scanning, boolean invert) {
return new RestrictedCodepointIterator(ci, filter, scanning, invert);
}
public CodepointIterator restrict(CodepointFilter filter) {
return restrict(this, filter);
}
public CodepointIterator restrict(CodepointFilter filter, boolean scanning) {
return restrict(this, filter, scanning);
}
public CodepointIterator restrict(CodepointFilter filter, boolean scanning, boolean invert) {
return restrict(this, filter, scanning, invert);
}
/**
* Get the next char.
* @return char
*/
protected abstract char get();
/**
* Get the specified char.
* @param index index
* @return char
*/
protected abstract char get(int index);
/**
* Checks if there are codepoints remaining.
* @return true if there are codepoints remaining
*/
@Override
public boolean hasNext() {
return remaining() > 0;
}
/**
* Return the final index position.
* @return final index position
*/
public int lastPosition() {
int p = position();
return (p > -1) ? (p >= limit()) ? p : p - 1 : -1;
}
/**
* Return the next chars. If the codepoint is not supplemental, the char array will have a single member. If the
* codepoint is supplemental, the char array will have two members, representing the high and low surrogate chars.
* @return next chars
*/
public char[] nextChars(){
if (hasNext()) {
if (isNextSurrogate()) {
char c1 = get();
if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
char c2 = get();
if (CharUtils.isLowSurrogate(c2)) {
return new char[]{c1, c2};
} else {
throw new InvalidCharacterException(c2);
}
} else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
char c2 = get(position() - 2);
if (CharUtils.isHighSurrogate(c2)) {
return new char[]{c1, c2};
} else {
throw new InvalidCharacterException(c2);
}
}
}
return new char[]{get()};
}
return null;
}
/**
* Peek the next chars in the iterator. If the codepoint is not supplemental, the char array will have a single
* member. If the codepoint is supplemental, the char array will have two members, representing the high and low
* surrogate chars.
* @return chars
*/
public char[] peekChars() {
return peekChars(position());
}
/**
* Peek the specified chars in the iterator. If the codepoint is not supplemental, the char array will have a single
* member. If the codepoint is supplemental, the char array will have two members, representing the high and low
* surrogate chars.
* @return chars
*/
private char[] peekChars(int pos) {
if (pos < 0 || pos >= limit()) {
return null;
}
char c1 = get(pos);
if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
char c2 = get(pos + 1);
if (CharUtils.isLowSurrogate(c2)) {
return new char[]{c1, c2};
} else {
throw new InvalidCharacterException(c2);
}
} else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
char c2 = get(pos - 1);
if (CharUtils.isHighSurrogate(c2)) {
return new char[]{c2, c1};
} else {
throw new InvalidCharacterException(c2);
}
} else {
return new char[]{c1};
}
}
/**
* Return the next codepoint.
* @return code point
*/
@Override
public Codepoint next() {
if (remaining() > 0) {
return toCodepoint(nextChars());
} else {
throw new NoSuchElementException();
}
}
/**
* Peek the next codepoint.
* @return code point
*/
public Codepoint peek() {
return toCodepoint(peekChars());
}
/**
* Peek the specified codepoint.
* @param index index
* @return code point
*/
public Codepoint peek(int index) {
return toCodepoint(peekChars(index));
}
private Codepoint toCodepoint(char[] chars) {
return (chars == null) ? null : (chars.length == 1) ? new Codepoint(chars[0]) : CharUtils
.toSupplementary(chars[0], chars[1]);
}
/**
* Set the iterator position.
* @param n iterator position
*/
public void position(int n) {
if (n < 0 || n > limit()) {
throw new ArrayIndexOutOfBoundsException(n);
}
position = n;
}
/**
* Get the iterator position.
* @return position
*/
public int position() {
return position;
}
/**
* Return the iterator limit.
* @return limit
*/
public int limit() {
return limit;
}
/**
* Return the remaining iterator size.
* @return remaining size
*/
public int remaining() {
return limit - position();
}
private boolean isNextSurrogate() {
if (!hasNext()) {
return false;
}
char c = get(position());
return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
}
/**
* Returns true if the char at the specified index is a high surrogate.
* @param index index
* @return true if the char at the specified index is a high surrogate
*/
public boolean isHigh(int index) {
if (index < 0 || index > limit()) {
throw new ArrayIndexOutOfBoundsException(index);
}
return CharUtils.isHighSurrogate(get(index));
}
/**
* Returns true if the char at the specified index is a low surrogate.
* @param index index
* @return true if the char at the specified index is a low surrogate
*/
public boolean isLow(int index) {
if (index < 0 || index > limit()) {
throw new ArrayIndexOutOfBoundsException(index);
}
return CharUtils.isLowSurrogate(get(index));
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,92 @@
package org.xbib.net.util;
import java.util.NoSuchElementException;
/**
* Base implementation of a CodepointIterator that filters the output of another CodpointIterator.
*/
public abstract class DelegatingCodepointIterator extends CodepointIterator {
private final CodepointIterator internal;
private boolean hasNext;
protected DelegatingCodepointIterator(CodepointIterator internal) {
this.internal = internal;
}
@Override
protected char get() {
return internal.get();
}
@Override
protected char get(int index) {
return internal.get(index);
}
@Override
public boolean hasNext() {
hasNext = internal.hasNext();
return hasNext;
}
@Override
public boolean isHigh(int index) {
return internal.isHigh(index);
}
@Override
public boolean isLow(int index) {
return internal.isLow(index);
}
@Override
public int limit() {
return internal.limit();
}
@Override
public Codepoint next() {
if (!hasNext) {
throw new NoSuchElementException();
}
return internal.next();
}
@Override
public char[] nextChars() {
return internal.nextChars();
}
@Override
public Codepoint peek() {
return internal.peek();
}
@Override
public Codepoint peek(int index) {
return internal.peek(index);
}
@Override
public char[] peekChars() {
return internal.peekChars();
}
@Override
public int position() {
return internal.position();
}
@Override
public int remaining() {
return internal.remaining();
}
@Override
public void position(int position) {
internal.position(position);
}
}

View file

@ -0,0 +1,17 @@
package org.xbib.net.util;
@SuppressWarnings("serial")
public class InvalidCharacterException extends RuntimeException {
private final int input;
public InvalidCharacterException(int input) {
this.input = input;
}
@Override
public String getMessage() {
return "Invalid Character 0x" + Integer.toHexString(input) + "(" + (char) input + ")";
}
}

View file

@ -0,0 +1,54 @@
package org.xbib.net.util;
/**
*
*/
public enum Profile {
NONE(codepoint -> true),
ALPHA(codepoint -> !CharUtils.isAlpha(codepoint)),
ALPHANUM(codepoint -> !CharUtils.isAlphaDigit(codepoint)),
FRAGMENT(codepoint -> !CharUtils.isFragment(codepoint)),
IFRAGMENT(codepoint -> !CharUtils.isIfragment(codepoint)),
PATH(codepoint -> !CharUtils.isPath(codepoint)),
IPATH(codepoint -> !CharUtils.isIpath(codepoint)),
IUSERINFO(codepoint -> !CharUtils.isIuserinfo(codepoint)),
USERINFO(codepoint -> !CharUtils.isUserInfo(codepoint)),
QUERY(codepoint -> !CharUtils.isQuery(codepoint)),
IQUERY(codepoint -> !CharUtils.isIquery(codepoint)),
SCHEME(codepoint -> !CharUtils.isScheme(codepoint)),
PATHNODELIMS(codepoint -> !CharUtils.isPathNoDelims(codepoint)),
IPATHNODELIMS(codepoint -> !CharUtils.isIpathnodelims(codepoint)),
IPATHNODELIMS_SEG(codepoint -> !CharUtils.isIpathnodelims(codepoint) && codepoint != '@' && codepoint != ':'),
IREGNAME(codepoint -> !CharUtils.isIregname(codepoint)),
IHOST(codepoint -> !CharUtils.isIhost(codepoint)),
IPRIVATE(codepoint -> !CharUtils.isIprivate(codepoint)),
RESERVED(codepoint -> !CharUtils.isReserved(codepoint)),
IUNRESERVED(codepoint -> !CharUtils.isIunreserved(codepoint)),
UNRESERVED(codepoint -> !CharUtils.isUnreserved(codepoint)),
SCHEMESPECIFICPART(codepoint -> !CharUtils.isIunreserved(codepoint) && !CharUtils.isReserved(codepoint)
&& !CharUtils.isIprivate(codepoint)
&& !CharUtils.isPctEnc(codepoint)
&& codepoint != '#'),
AUTHORITY(codepoint -> !CharUtils.isRegname(codepoint) && !CharUtils.isUserInfo(codepoint) && !CharUtils.isGenDelim(codepoint)),
ASCIISANSCRLF(codepoint -> !CharUtils.inRange(codepoint, 1, 9) && !CharUtils.inRange(codepoint, 14, 127)),
PCT(codepoint -> !CharUtils.isPctEnc(codepoint)),
STD3ASCIIRULES(codepoint -> !CharUtils.inRange(codepoint, 0x0000, 0x002C) &&
!CharUtils.inRange(codepoint, 0x002E, 0x002F) &&
!CharUtils.inRange(codepoint, 0x003A, 0x0040) &&
!CharUtils.inRange(codepoint, 0x005B, 0x0060) &&
!CharUtils.inRange(codepoint, 0x007B, 0x007F));
private final CodepointFilter filter;
Profile(CodepointFilter filter) {
this.filter = filter;
}
public CodepointFilter filter() {
return filter;
}
public boolean check(int codepoint) {
return filter.accept(codepoint);
}
}

View file

@ -0,0 +1,83 @@
package org.xbib.net.util;
class RestrictedCodepointIterator extends DelegatingCodepointIterator {
private final CodepointFilter filter;
private final boolean scanningOnly;
private final boolean notset;
RestrictedCodepointIterator(CodepointIterator internal, CodepointFilter filter, boolean scanningOnly) {
this(internal, filter, scanningOnly, false);
}
RestrictedCodepointIterator(CodepointIterator internal,
CodepointFilter filter,
boolean scanningOnly,
boolean notset) {
super(internal);
this.filter = filter;
this.scanningOnly = scanningOnly;
this.notset = notset;
}
@Override
public boolean hasNext() {
boolean b = super.hasNext();
if (scanningOnly) {
try {
int cp = super.peek(super.position()).getValue();
if (b && cp != -1 && check(cp)) {
return false;
}
} catch (InvalidCharacterException e) {
return false;
}
}
return b;
}
@Override
public Codepoint next() {
Codepoint cp = super.next();
int v = cp.getValue();
if (v != -1 && check(v)) {
if (scanningOnly) {
super.position(super.position() - 1);
return null;
} else {
throw new InvalidCharacterException(v);
}
}
return cp;
}
private boolean check(int cp) {
return notset == !filter.accept(cp);
}
@Override
public char[] nextChars() {
char[] chars = super.nextChars();
if (chars != null && chars.length > 0) {
if (chars.length == 1 && check(chars[0])) {
if (scanningOnly) {
super.position(super.position() - 1);
return null;
} else {
throw new InvalidCharacterException(chars[0]);
}
} else if (chars.length == 2) {
int cp = CharUtils.toSupplementary(chars[0], chars[1]).getValue();
if (check(cp)) {
if (scanningOnly) {
super.position(super.position() - 2);
return null;
} else {
throw new InvalidCharacterException(cp);
}
}
}
}
return chars;
}
}

View file

@ -0,0 +1,202 @@
package org.xbib.net;
import java.net.URI;
import java.net.URISyntaxException;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@Disabled
public class OtherIRITest {
@Test
public void testSimple() throws Exception {
IRI iri = IRI.create("http://validator.w3.org/check?uri=http%3A%2F%2Fr\u00E9sum\u00E9.example.org");
assertEquals("http", iri.getScheme());
assertEquals("validator.w3.org", iri.getHost());
assertEquals("/check", iri.getPath());
assertEquals("//validator.w3.org/check?uri=http%3A%2F%2Frésumé.example.org", iri.getSchemeSpecificPart());
assertEquals("http://validator.w3.org/check?uri=http%3A%2F%2Fr\u00E9sum\u00E9.example.org", iri.toString());
assertEquals("http://validator.w3.org/check?uri=http%3A%2F%2Fr%C3%A9sum%C3%A9.example.org", iri.toURI().toString());
}
@Test
public void testIpv4() throws Exception {
IRI iri = IRI.create("http://127.0.0.1");
assertEquals("http://127.0.0.1", iri.toURI().toString());
}
@Test
public void testIpv6() throws Exception {
IRI iri = IRI.create("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]");
assertEquals("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", iri.toURI().toString());
}
@Test
public void testUnderscore() throws Exception{
IRI iri = IRI.create("http://its_gbsc.cn.ibm.com/");
assertEquals("http://its_gbsc.cn.ibm.com/", iri.toURI().toString());
}
@Test
public void testIpv6Invalid() throws URISyntaxException {
IRI iri = IRI.create("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:734o]");
iri.toURI().toString();
}
@Test
public void testFile() throws Exception {
IRI iri = IRI.create("file:///tmp/test/foo");
assertEquals("file:///tmp/test/foo", iri.toURI().toString());
}
@Test
public void testSimple2() throws Exception {
IRI iri = IRI.create("http://www.example.org/red%09ros\u00E9#red");
assertEquals("http://www.example.org/red%09ros%C3%A9#red", iri.toURI().toString());
}
@Test
public void testNotSoSimple() throws Exception {
IRI iri = IRI.create("http://example.com/\uD800\uDF00\uD800\uDF01\uD800\uDF02");
assertEquals("http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82", iri.toURI().toString());
}
@Test
public void testIRItoURI() throws Exception {
IRI iri = IRI.create("http://\u7D0D\u8C46.example.org/%E2%80%AE");
URI uri = iri.toURI();
assertEquals("http://xn--99zt52a.example.org/%E2%80%AE", uri.toString());
}
@Test
public void testComparison() throws Exception {
IRI iri1 = IRI.create("http://www.example.org/");
IRI iri2 = IRI.create("http://www.example.org/..");
IRI iri3 = IRI.create("http://www.Example.org:80");
assertFalse(iri1.equals(iri2)); // false
assertFalse(iri1.equals(iri3)); // false
assertFalse(iri2.equals(iri1)); // false
assertFalse(iri2.equals(iri3)); // false
assertFalse(iri3.equals(iri1)); // false
assertFalse(iri3.equals(iri2)); // false
/*assertTrue(iri1.normalize().equals(iri2.normalize()));
assertTrue(iri1.normalize().equals(iri3.normalize()));
assertTrue(iri2.normalize().equals(iri1.normalize()));
assertTrue(iri2.normalize().equals(iri3.normalize()));
assertTrue(iri3.normalize().equals(iri1.normalize()));
assertTrue(iri3.normalize().equals(iri2.normalize()));*/
}
@Test
public void testUCN() throws Exception {
//IRI iri1 = IRI.create("http://www.example.org/r\u00E9sum\u00E9.html");
//IRI iri2 = IRI.create("http://www.example.org/re\u0301sume\u0301.html", Normalizer.Form.NFC);
//assertEquals(iri2, iri1);
}
@Test
public void testPercent() throws Exception {
IRI iri1 = IRI.create("http://example.org/%7e%2Fuser?%2f");
IRI iri2 = IRI.create("http://example.org/%7E%2fuser?/");
//assertTrue(iri1.normalize().equals(iri2.normalize()));
}
@Test
public void testIDN() throws Exception {
IRI iri1 = IRI.create("http://r\u00E9sum\u00E9.example.org");
assertEquals("xn--rsum-bpad.example.org", iri1.getASCIIHost());
}
@Test
public void testRelative() throws Exception {
IRI base = IRI.create("http://example.org/foo/");
assertEquals("http://example.org/", base.resolve("/").toString());
assertEquals("http://example.org/test", base.resolve("/test").toString());
assertEquals("http://example.org/foo/test", base.resolve("test").toString());
assertEquals("http://example.org/test", base.resolve("../test").toString());
assertEquals("http://example.org/foo/test", base.resolve("./test").toString());
assertEquals("http://example.org/foo/", base.resolve("test/test/../../").toString());
assertEquals("http://example.org/foo/?test", base.resolve("?test").toString());
assertEquals("http://example.org/foo/#test", base.resolve("#test").toString());
assertEquals("http://example.org/foo/", base.resolve(".").toString());
}
/**
* Try a variety of URI schemes. If any problematic schemes pop up, we should add a test for 'em here
*/
@Test
public void testSchemes() throws Exception {
IRI iri = IRI.create("http://a:b@c.org:80/d/e?f#g");
assertEquals("http", iri.getScheme());
assertEquals("a:b", iri.getUserInfo());
assertEquals("c.org", iri.getHost());
assertEquals(80, iri.getPort());
assertEquals("/d/e", iri.getPath());
assertEquals("f", iri.getQuery());
assertEquals("g", iri.getFragment());
iri = IRI.create("https://a:b@c.org:80/d/e?f#g");
assertEquals("https", iri.getScheme());
assertEquals("a:b", iri.getUserInfo());
assertEquals("c.org", iri.getHost());
assertEquals(80, iri.getPort());
assertEquals("/d/e", iri.getPath());
assertEquals("f", iri.getQuery());
assertEquals("g", iri.getFragment());
iri = IRI.create("ftp://a:b@c.org:80/d/e?f#g");
assertEquals("ftp", iri.getScheme());
assertEquals("a:b", iri.getUserInfo());
assertEquals("c.org", iri.getHost());
assertEquals(80, iri.getPort());
assertEquals("/d/e", iri.getPath());
assertEquals("f", iri.getQuery());
assertEquals("g", iri.getFragment());
iri = IRI.create("mailto:joe@example.org?subject=foo");
assertEquals("mailto", iri.getScheme());
assertEquals(null, iri.getUserInfo());
assertEquals(null, iri.getHost());
assertEquals(-1, iri.getPort());
assertEquals("joe@example.org", iri.getPath());
assertEquals("subject=foo", iri.getQuery());
assertEquals(null, iri.getFragment());
iri = IRI.create("tag:example.org,2006:foo");
assertEquals("tag", iri.getScheme());
assertEquals(null, iri.getUserInfo());
assertEquals(null, iri.getHost());
assertEquals(-1, iri.getPort());
assertEquals("example.org,2006:foo", iri.getPath());
assertEquals(null, iri.getQuery());
assertEquals(null, iri.getFragment());
iri = IRI.create("urn:lsid:ibm.com:example:82437234964354895798234d");
assertEquals("urn", iri.getScheme());
assertEquals(null, iri.getUserInfo());
assertEquals(null, iri.getHost());
assertEquals(-1, iri.getPort());
assertEquals("lsid:ibm.com:example:82437234964354895798234d", iri.getPath());
assertEquals(null, iri.getQuery());
assertEquals(null, iri.getFragment());
iri = IRI.create("data:image/gif;base64,R0lGODdhMAAwAPAAAAAAAP");
assertEquals("data", iri.getScheme());
assertEquals(null, iri.getUserInfo());
assertEquals(null, iri.getHost());
assertEquals(-1, iri.getPort());
assertEquals("image/gif;base64,R0lGODdhMAAwAPAAAAAAAP", iri.getPath());
assertEquals(null, iri.getQuery());
assertEquals(null, iri.getFragment());
}
}